Merge remote-tracking branch 'origin/master' into failing-tests

Sefaria · Nov 9, 2023 · b034fee · b034fee
2 parents e5f8a3d + bbeb462
commit b034fee
Show file tree

Hide file tree

Showing 13 changed files with 172 additions and 218 deletions.
diff --git a/helm-chart/sefaria-project/templates/configmap/create-mongo-dumps.yaml b/helm-chart/sefaria-project/templates/configmap/create-mongo-dumps.yaml
@@ -67,8 +67,7 @@ data:
     sleep 2
     done
 
-    until mongodump --uri="$URI" -v -d $DATABASE --excludeCollection=history --excludeCollection=texts --excludeCollection=sheets --excludeCollection=links 
-    --excludeCollection=django_cache  --excludeCollection=user_history -o "${DATADIR}/dump"
+    until mongodump --uri="$URI" -v -d $DATABASE --excludeCollection=history --excludeCollection=texts --excludeCollection=sheets --excludeCollection=links --excludeCollection=django_cache  --excludeCollection=user_history -o "${DATADIR}/dump"
     do
     echo "trying to dump other stuff again"
     sleep 2

diff --git a/sefaria/model/count.py b/sefaria/model/count.py
diff --git a/sefaria/model/place.py b/sefaria/model/place.py
@@ -64,7 +64,11 @@ def create_new_place(cls, en, he=None):
     def city_to_coordinates(self, city):
         geolocator = Nominatim(user_agent='hello@sefaria.org')
         location = geolocator.geocode(city)
-        self.point_location(lon=location.longitude, lat=location.latitude)
+        if location and location.raw['type'] in ['administrative', 'city', 'town', 'municipality']:
+            self.point_location(lon=location.longitude, lat=location.latitude)
+        else:
+            raise InputError(f"{city} is not a real city.")
+
 
     def point_location(self, lon=None, lat=None):
         if lat is None and lon is None:

diff --git a/sefaria/model/schema.py b/sefaria/model/schema.py
@@ -1073,12 +1073,11 @@ def full_regex(self, title, lang, anchored=True, compiled=True, capture_title=Fa
     def address_regex(self, lang, **kwargs):
         group = "a0"
         reg = self._addressTypes[0].regex(lang, group, **kwargs)
-        if not self._addressTypes[0].stop_parsing(lang):
-            for i in range(1, self.depth):
-                group = "a{}".format(i)
-                reg += "(" + self.after_address_delimiter_ref + self._addressTypes[i].regex(lang, group, **kwargs) + ")"
-                if not kwargs.get("strict", False):
-                    reg += "?"
+        for i in range(1, self.depth):
+            group = "a{}".format(i)
+            reg += "(" + self.after_address_delimiter_ref + self._addressTypes[i].regex(lang, group, **kwargs) + ")"
+            if not kwargs.get("strict", False):
+                reg += "?"
 
         if kwargs.get("match_range"):
             # TODO there is a potential error with this regex. it fills in toSections starting from highest depth and going to lowest.
@@ -1089,14 +1088,13 @@ def address_regex(self, lang, **kwargs):
             reg += r"(?=\S)"  # must be followed by something (Lookahead)
             group = "ar0"
             reg += self._addressTypes[0].regex(lang, group, **kwargs)
-            if not self._addressTypes[0].stop_parsing(lang):
-                reg += "?"
-                for i in range(1, self.depth):
-                    reg += r"(?:(?:" + self.after_address_delimiter_ref + r")?"
-                    group = "ar{}".format(i)
-                    reg += "(" + self._addressTypes[i].regex(lang, group, **kwargs) + ")"
-                    # assuming strict isn't relevant on ranges  # if not kwargs.get("strict", False):
-                    reg += ")?"
+            reg += "?"
+            for i in range(1, self.depth):
+                reg += r"(?:(?:" + self.after_address_delimiter_ref + r")?"
+                group = "ar{}".format(i)
+                reg += "(" + self._addressTypes[i].regex(lang, group, **kwargs) + ")"
+                # assuming strict isn't relevant on ranges  # if not kwargs.get("strict", False):
+                reg += ")?"
             reg += r")?"  # end range clause
         return reg
 
@@ -2072,15 +2070,6 @@ def hebrew_number_regex():
                 [\u05d0-\u05d8]?					    # One or zero alef-tet (1-9)
         )"""
 
-    def stop_parsing(self, lang):
-        """
-        If this is true, the regular expression will stop parsing at this address level for this language.
-        It is currently checked for only in the first address position, and is used for Hebrew Talmud addresses.
-        :param lang: "en" or "he"
-        :return bool:
-        """
-        return False
-
     def toNumber(self, lang, s):
         """
         Return the numerical form of s in this address scheme
@@ -2351,11 +2340,6 @@ def _core_regex(self, lang, group_id=None, **kwargs):
 
         return reg
 
-    def stop_parsing(self, lang):
-        if lang == "he":
-            return True
-        return False
-
     def toNumber(self, lang, s, **kwargs):
         amud_b_list = ['b', 'B', 'ᵇ']
         if lang == "en":
@@ -2490,11 +2474,6 @@ def _core_regex(self, lang, group_id=None, **kwargs):
 
         return reg
 
-    def stop_parsing(self, lang):
-        if lang == "he":
-            return True
-        return False
-
     def toNumber(self, lang, s, **kwargs):
         if lang == "en":
             try:

diff --git a/sefaria/model/tests/he_ref_test.py b/sefaria/model/tests/he_ref_test.py
@@ -100,6 +100,10 @@ def test_talmud(self):
         assert r.sections[0] == 58
         assert len(r.sections) == 1
 
+        r = m.Ref("סוטה לה א:יא")
+        assert r.book == 'Sotah'
+        assert r.sections == [69, 11]
+
     def test_length_catching(self):
         with pytest.raises(InputError):
             r = m.Ref('דברים שם')

diff --git a/sefaria/model/tests/text_test.py b/sefaria/model/tests/text_test.py
@@ -169,9 +169,9 @@ def test_invalid_index_save_no_category():
 
 def test_best_time_period():
     i = model.library.get_index("Rashi on Genesis")
-    assert i.best_time_period().period_string('en') == ' (c.1075  - c.1105 CE)'
+    assert i.best_time_period().period_string('en') == ' (c.1075  – c.1105 CE)'
     i.compDate = None
-    assert i.best_time_period().period_string('en') == ' (1040  - 1105 CE)'  # now that compDate is None, period_string should return Rashi's birth to death years
+    assert i.best_time_period().period_string('en') == ' (1040  – 1105 CE)'  # now that compDate is None, period_string should return Rashi's birth to death years
 
 def test_invalid_index_save_no_hebrew_collective_title():
     title = 'Bartenura (The Next Generation)'

diff --git a/sefaria/model/text.py b/sefaria/model/text.py
@@ -1302,7 +1302,7 @@ def _validate(self):
         """
         languageCodeRe = re.search(r"\[([a-z]{2})\]$", getattr(self, "versionTitle", None))
         if languageCodeRe and languageCodeRe.group(1) != getattr(self,"actualLanguage",None):
-            raise InputError("Version actualLanguage does not match bracketed language")
+            self.actualLanguage = languageCodeRe.group(1)
         if getattr(self,"language", None) not in ["en", "he"]:
             raise InputError("Version language must be either 'en' or 'he'")
         index = self.get_index()

diff --git a/sefaria/model/timeperiod.py b/sefaria/model/timeperiod.py
@@ -56,6 +56,8 @@
 +---------------+------------+-----------------+-------------------------------+-----------------------+
 """
 
+DASH = '–'
+
 class TimePeriod(abst.AbstractMongoRecord):
     """
     TimePeriod is used both for the saved time periods - Eras and Generations
@@ -144,30 +146,33 @@ def period_string(self, lang):
 
             if lang == "en":
                 if getattr(self, "symbol", "") == "CO" or getattr(self, "end", None) is None:
-                    name += " ({}{} {} - )".format(
+                    name += " ({}{} {} {} )".format(
                         approxMarker[0],
                         abs(int(self.start)),
-                        labels[1])
+                        labels[1],
+                        DASH)
                     return name
                 elif int(self.start) == int(self.end):
                     name += " ({}{} {})".format(
                         approxMarker[0],
                         abs(int(self.start)),
                         labels[1])
                 else:
-                    name += " ({}{} {} - {}{} {})".format(
+                    name += " ({}{} {} {} {}{} {})".format(
                         approxMarker[0],
                         abs(int(self.start)),
                         labels[0],
+                        DASH,
                         approxMarker[1],
                         abs(int(self.end)),
                         labels[1])
             if lang == "he":
                 if getattr(self, "symbol", "") == "CO" or getattr(self, "end", None) is None:
-                    name += " ({} {} {} - )".format(
+                    name += " ({} {} {} {} )".format(
                         abs(int(self.start)),
                         labels[1],
-                        approxMarker[0])
+                        approxMarker[0],
+                        DASH)
                     return name
                 elif int(self.start) == int(self.end):
                     name += " ({}{}{})".format(
@@ -177,18 +182,20 @@ def period_string(self, lang):
                 else:
                     both_approx = approxMarker[0] and approxMarker[1]
                     if both_approx:
-                        name += " ({}{} - {}{} {})".format(
+                        name += " ({}{} {} {}{} {})".format(
                             abs(int(self.start)),
                             " " + labels[0] if labels[0] else "",
+                            DASH,
                             abs(int(self.end)),
                             " " + labels[1] if labels[1] else "",
                             approxMarker[1]
                         )
                     else:
-                        name += " ({}{}{} - {}{}{})".format(
+                        name += " ({}{}{} {} {}{}{})".format(
                             abs(int(self.start)),
                             " " + labels[0] if labels[0] else "",
                             " " + approxMarker[0] if approxMarker[0] else "",
+                            DASH,
                             abs(int(self.end)),
                             " " + labels[1] if labels[1] else "",
                             " " + approxMarker[1] if approxMarker[1] else ""
@@ -218,6 +225,16 @@ def get_people_in_generation(self, include_doubles = True):
             else:
                 return topic.Topic({"properties.generation.value": self.symbol})
 
+    def determine_year_estimate(self):
+        start = getattr(self, 'start', None)
+        end = getattr(self, 'end', None)
+        if start != None and end != None:
+            return round((int(start) + int(end)) / 2)
+        elif start != None:
+            return int(start)
+        elif end != None:
+            return int(end)
+
 class TimePeriodSet(abst.AbstractMongoSet):
     recordClass = TimePeriod
 
@@ -234,3 +251,41 @@ def get_generations(include_doubles = False):
         arg = {"$in": ["Generation", "Two Generations"]} if include_doubles else "Generation"
         return TimePeriodSet._get_typed_set(arg)
 
+class LifePeriod(TimePeriod):
+
+    def period_string(self, lang):
+
+        if getattr(self, "start", None) == None and getattr(self, "end", None) == None:
+            return
+
+        labels = self.getYearLabels(lang)
+        approxMarker = self.getApproximateMarkers(lang)
+        abs_birth = abs(int(getattr(self, "start", 0)))
+        abs_death = abs(int(getattr(self, "end", 0)))
+        if lang == "en":
+            birth = 'b.'
+            death = 'd.'
+            order_vars_by_lang = lambda year, label, approx: (approx, '', year, label)
+        else:
+            birth = 'נו׳'
+            death = 'נפ׳'
+            order_vars_by_lang = lambda year, label, approx: (year, ' ', label, approx)
+
+        if getattr(self, "symbol", "") == "CO" or getattr(self, "end", None) is None:
+            name = '{} {}{}{} {}'.format(birth, *order_vars_by_lang(abs_birth, labels[1], approxMarker[0]))
+        elif getattr(self, "start", None) is None:
+            name = '{} {}{}{} {}'.format(death, *order_vars_by_lang(abs_death, labels[1], approxMarker[0]))
+        elif int(self.start) == int(self.end):
+            name = '{}{}{} {}'.format(*order_vars_by_lang(abs_birth, labels[1], approxMarker[0]))
+        else:
+            both_approx = approxMarker[0] and approxMarker[1]
+            if lang == 'he' and  both_approx:
+                birth_string = '{}{}{}'.format(*order_vars_by_lang(abs_birth, labels[0], approxMarker[0])[:-1])
+            else:
+                birth_string = '{}{}{} {}'.format(*order_vars_by_lang(abs_birth, labels[0], approxMarker[0]))
+            death_string = '{}{}{} {}'.format(*order_vars_by_lang(abs_death, labels[1], approxMarker[0]))
+            name = f'{birth_string} {DASH} {death_string}'
+
+        name = f' ({" ".join(name.split())})'
+        return name
+