Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/master' into failing-tests
Browse files Browse the repository at this point in the history
  • Loading branch information
edamboritz committed Nov 9, 2023
2 parents e5f8a3d + bbeb462 commit b034fee
Show file tree
Hide file tree
Showing 13 changed files with 172 additions and 218 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,7 @@ data:
sleep 2
done
until mongodump --uri="$URI" -v -d $DATABASE --excludeCollection=history --excludeCollection=texts --excludeCollection=sheets --excludeCollection=links
--excludeCollection=django_cache --excludeCollection=user_history -o "${DATADIR}/dump"
until mongodump --uri="$URI" -v -d $DATABASE --excludeCollection=history --excludeCollection=texts --excludeCollection=sheets --excludeCollection=links --excludeCollection=django_cache --excludeCollection=user_history -o "${DATADIR}/dump"
do
echo "trying to dump other stuff again"
sleep 2
Expand Down
93 changes: 0 additions & 93 deletions sefaria/model/count.py

This file was deleted.

6 changes: 5 additions & 1 deletion sefaria/model/place.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,11 @@ def create_new_place(cls, en, he=None):
def city_to_coordinates(self, city):
geolocator = Nominatim(user_agent='hello@sefaria.org')
location = geolocator.geocode(city)
self.point_location(lon=location.longitude, lat=location.latitude)
if location and location.raw['type'] in ['administrative', 'city', 'town', 'municipality']:
self.point_location(lon=location.longitude, lat=location.latitude)
else:
raise InputError(f"{city} is not a real city.")


def point_location(self, lon=None, lat=None):
if lat is None and lon is None:
Expand Down
45 changes: 12 additions & 33 deletions sefaria/model/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -1073,12 +1073,11 @@ def full_regex(self, title, lang, anchored=True, compiled=True, capture_title=Fa
def address_regex(self, lang, **kwargs):
group = "a0"
reg = self._addressTypes[0].regex(lang, group, **kwargs)
if not self._addressTypes[0].stop_parsing(lang):
for i in range(1, self.depth):
group = "a{}".format(i)
reg += "(" + self.after_address_delimiter_ref + self._addressTypes[i].regex(lang, group, **kwargs) + ")"
if not kwargs.get("strict", False):
reg += "?"
for i in range(1, self.depth):
group = "a{}".format(i)
reg += "(" + self.after_address_delimiter_ref + self._addressTypes[i].regex(lang, group, **kwargs) + ")"
if not kwargs.get("strict", False):
reg += "?"

if kwargs.get("match_range"):
# TODO there is a potential error with this regex. it fills in toSections starting from highest depth and going to lowest.
Expand All @@ -1089,14 +1088,13 @@ def address_regex(self, lang, **kwargs):
reg += r"(?=\S)" # must be followed by something (Lookahead)
group = "ar0"
reg += self._addressTypes[0].regex(lang, group, **kwargs)
if not self._addressTypes[0].stop_parsing(lang):
reg += "?"
for i in range(1, self.depth):
reg += r"(?:(?:" + self.after_address_delimiter_ref + r")?"
group = "ar{}".format(i)
reg += "(" + self._addressTypes[i].regex(lang, group, **kwargs) + ")"
# assuming strict isn't relevant on ranges # if not kwargs.get("strict", False):
reg += ")?"
reg += "?"
for i in range(1, self.depth):
reg += r"(?:(?:" + self.after_address_delimiter_ref + r")?"
group = "ar{}".format(i)
reg += "(" + self._addressTypes[i].regex(lang, group, **kwargs) + ")"
# assuming strict isn't relevant on ranges # if not kwargs.get("strict", False):
reg += ")?"
reg += r")?" # end range clause
return reg

Expand Down Expand Up @@ -2072,15 +2070,6 @@ def hebrew_number_regex():
[\u05d0-\u05d8]? # One or zero alef-tet (1-9)
)"""

def stop_parsing(self, lang):
"""
If this is true, the regular expression will stop parsing at this address level for this language.
It is currently checked for only in the first address position, and is used for Hebrew Talmud addresses.
:param lang: "en" or "he"
:return bool:
"""
return False

def toNumber(self, lang, s):
"""
Return the numerical form of s in this address scheme
Expand Down Expand Up @@ -2351,11 +2340,6 @@ def _core_regex(self, lang, group_id=None, **kwargs):

return reg

def stop_parsing(self, lang):
if lang == "he":
return True
return False

def toNumber(self, lang, s, **kwargs):
amud_b_list = ['b', 'B', 'ᵇ']
if lang == "en":
Expand Down Expand Up @@ -2490,11 +2474,6 @@ def _core_regex(self, lang, group_id=None, **kwargs):

return reg

def stop_parsing(self, lang):
if lang == "he":
return True
return False

def toNumber(self, lang, s, **kwargs):
if lang == "en":
try:
Expand Down
4 changes: 4 additions & 0 deletions sefaria/model/tests/he_ref_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,10 @@ def test_talmud(self):
assert r.sections[0] == 58
assert len(r.sections) == 1

r = m.Ref("סוטה לה א:יא")
assert r.book == 'Sotah'
assert r.sections == [69, 11]

def test_length_catching(self):
with pytest.raises(InputError):
r = m.Ref('דברים שם')
Expand Down
4 changes: 2 additions & 2 deletions sefaria/model/tests/text_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,9 +169,9 @@ def test_invalid_index_save_no_category():

def test_best_time_period():
i = model.library.get_index("Rashi on Genesis")
assert i.best_time_period().period_string('en') == ' (c.1075 - c.1105 CE)'
assert i.best_time_period().period_string('en') == ' (c.1075 c.1105 CE)'
i.compDate = None
assert i.best_time_period().period_string('en') == ' (1040 - 1105 CE)' # now that compDate is None, period_string should return Rashi's birth to death years
assert i.best_time_period().period_string('en') == ' (1040 1105 CE)' # now that compDate is None, period_string should return Rashi's birth to death years

def test_invalid_index_save_no_hebrew_collective_title():
title = 'Bartenura (The Next Generation)'
Expand Down
2 changes: 1 addition & 1 deletion sefaria/model/text.py
Original file line number Diff line number Diff line change
Expand Up @@ -1302,7 +1302,7 @@ def _validate(self):
"""
languageCodeRe = re.search(r"\[([a-z]{2})\]$", getattr(self, "versionTitle", None))
if languageCodeRe and languageCodeRe.group(1) != getattr(self,"actualLanguage",None):
raise InputError("Version actualLanguage does not match bracketed language")
self.actualLanguage = languageCodeRe.group(1)
if getattr(self,"language", None) not in ["en", "he"]:
raise InputError("Version language must be either 'en' or 'he'")
index = self.get_index()
Expand Down
69 changes: 62 additions & 7 deletions sefaria/model/timeperiod.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,8 @@
+---------------+------------+-----------------+-------------------------------+-----------------------+
"""

DASH = '–'

class TimePeriod(abst.AbstractMongoRecord):
"""
TimePeriod is used both for the saved time periods - Eras and Generations
Expand Down Expand Up @@ -144,30 +146,33 @@ def period_string(self, lang):

if lang == "en":
if getattr(self, "symbol", "") == "CO" or getattr(self, "end", None) is None:
name += " ({}{} {} - )".format(
name += " ({}{} {} {} )".format(
approxMarker[0],
abs(int(self.start)),
labels[1])
labels[1],
DASH)
return name
elif int(self.start) == int(self.end):
name += " ({}{} {})".format(
approxMarker[0],
abs(int(self.start)),
labels[1])
else:
name += " ({}{} {} - {}{} {})".format(
name += " ({}{} {} {} {}{} {})".format(
approxMarker[0],
abs(int(self.start)),
labels[0],
DASH,
approxMarker[1],
abs(int(self.end)),
labels[1])
if lang == "he":
if getattr(self, "symbol", "") == "CO" or getattr(self, "end", None) is None:
name += " ({} {} {} - )".format(
name += " ({} {} {} {} )".format(
abs(int(self.start)),
labels[1],
approxMarker[0])
approxMarker[0],
DASH)
return name
elif int(self.start) == int(self.end):
name += " ({}{}{})".format(
Expand All @@ -177,18 +182,20 @@ def period_string(self, lang):
else:
both_approx = approxMarker[0] and approxMarker[1]
if both_approx:
name += " ({}{} - {}{} {})".format(
name += " ({}{} {} {}{} {})".format(
abs(int(self.start)),
" " + labels[0] if labels[0] else "",
DASH,
abs(int(self.end)),
" " + labels[1] if labels[1] else "",
approxMarker[1]
)
else:
name += " ({}{}{} - {}{}{})".format(
name += " ({}{}{} {} {}{}{})".format(
abs(int(self.start)),
" " + labels[0] if labels[0] else "",
" " + approxMarker[0] if approxMarker[0] else "",
DASH,
abs(int(self.end)),
" " + labels[1] if labels[1] else "",
" " + approxMarker[1] if approxMarker[1] else ""
Expand Down Expand Up @@ -218,6 +225,16 @@ def get_people_in_generation(self, include_doubles = True):
else:
return topic.Topic({"properties.generation.value": self.symbol})

def determine_year_estimate(self):
start = getattr(self, 'start', None)
end = getattr(self, 'end', None)
if start != None and end != None:
return round((int(start) + int(end)) / 2)
elif start != None:
return int(start)
elif end != None:
return int(end)

class TimePeriodSet(abst.AbstractMongoSet):
recordClass = TimePeriod

Expand All @@ -234,3 +251,41 @@ def get_generations(include_doubles = False):
arg = {"$in": ["Generation", "Two Generations"]} if include_doubles else "Generation"
return TimePeriodSet._get_typed_set(arg)

class LifePeriod(TimePeriod):

def period_string(self, lang):

if getattr(self, "start", None) == None and getattr(self, "end", None) == None:
return

labels = self.getYearLabels(lang)
approxMarker = self.getApproximateMarkers(lang)
abs_birth = abs(int(getattr(self, "start", 0)))
abs_death = abs(int(getattr(self, "end", 0)))
if lang == "en":
birth = 'b.'
death = 'd.'
order_vars_by_lang = lambda year, label, approx: (approx, '', year, label)
else:
birth = 'נו׳'
death = 'נפ׳'
order_vars_by_lang = lambda year, label, approx: (year, ' ', label, approx)

if getattr(self, "symbol", "") == "CO" or getattr(self, "end", None) is None:
name = '{} {}{}{} {}'.format(birth, *order_vars_by_lang(abs_birth, labels[1], approxMarker[0]))
elif getattr(self, "start", None) is None:
name = '{} {}{}{} {}'.format(death, *order_vars_by_lang(abs_death, labels[1], approxMarker[0]))
elif int(self.start) == int(self.end):
name = '{}{}{} {}'.format(*order_vars_by_lang(abs_birth, labels[1], approxMarker[0]))
else:
both_approx = approxMarker[0] and approxMarker[1]
if lang == 'he' and both_approx:
birth_string = '{}{}{}'.format(*order_vars_by_lang(abs_birth, labels[0], approxMarker[0])[:-1])
else:
birth_string = '{}{}{} {}'.format(*order_vars_by_lang(abs_birth, labels[0], approxMarker[0]))
death_string = '{}{}{} {}'.format(*order_vars_by_lang(abs_death, labels[1], approxMarker[0]))
name = f'{birth_string} {DASH} {death_string}'

name = f' ({" ".join(name.split())})'
return name

Loading

0 comments on commit b034fee

Please sign in to comment.