Skip to content

Commit

Permalink
minor fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
ontanj committed Mar 11, 2020
1 parent b1272d5 commit 7bf711a
Showing 1 changed file with 31 additions and 40 deletions.
71 changes: 31 additions & 40 deletions propositions.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,19 +9,25 @@
class SAOLWordFinder:

def __init__(self, pattern, verbose=False):
self.first_finder = re.compile(r'^(\w*)([$|@|£])')
self.find_def = re.compile(r'class="def".*?>(.*?)(?:<span.*?>(.*?)</span>(.*?))?</span>', re.S)
self.find_links = re.compile(r"onclick=\"return loadDiv\('#saol-1','(/tri/f_saol\.php\?id=.*?)'\)\"><span class=\"dig\">(?: &nbsp|1)")
self.pattern = pattern
self.words = []
self.consonants = "bcdfghjklmnpqrstvwxz"
self.vocals = "aeiouyåäö"
self.letters = "abcdefghijklmnopqrstuvwxyzåäö"
self.first_finder = re.compile(r'^(\w*?)([$|@|£])')
self.find_def = re.compile(r'class="def"[^<>]*>([^<>]*)(?:<span [^<>]*>([^<>]*)</span>([^<>]*))?(?:<span [^<>]*>([^<>]*)</span>([^<>]*))?(?:<span [^<>]*>([^<>]*)</span>([^<>]*))?(?:<span [^<>]*>([^<>]*)</span>([^<>]*))?', re.S)
self.find_links = re.compile(r"onclick=\"return loadDiv\('#saol-1','(/tri/f_saol\.php\?id=.*?)'\)\"><span class=\"dig\">(?: &nbsp|1)")
self.find_grundform = re.compile(r'<span class="grundform">(.*?)</span>')
self.compile_regex(pattern)
self.no_of_props = self.find_no_of_props(pattern)
self.find_no_of_props(pattern)
self.verbose = verbose
self.get_wild_numbers()
self.find_grundform = re.compile(r'<span class="grundform">(.*?)</span>')

def compile_regex(self, pattern):
pattern = pattern.replace('@',f'([{self.vocals}])').replace('£',f'([{self.letters}])').replace('$',f'([{self.consonants}])')
class_pattern = 'class="bform"[^<>]*>(' + pattern + ')</span>'
self.word_pattern = re.compile(pattern)
self.class_pattern = re.compile(class_pattern)

def find_no_of_props(self, pattern):
self.wild_sequence = re.findall(r'[@£$]', pattern)
Expand All @@ -33,7 +39,18 @@ def find_no_of_props(self, pattern):
no *= 29
else:
no *= 20
return no
self.no_of_props = no

def get_wild_numbers(self):
wild_numbers = []
for sign in self.wild_sequence:
if sign == "@":
wild_numbers.append(9)
elif sign == "£":
wild_numbers.append(29)
else:
wild_numbers.append(20)
self.wild_numbers = wild_numbers

def goto(self, word):
word = urllib.parse.quote(word)
Expand All @@ -53,21 +70,17 @@ def fit(self, lemma):
return match.group(1)

def search(self):
self.look_for(self.pattern)
last = self.check(self.pattern)
while last != True:
new_props = self.new_search_array(self.pattern, last)
for prop in new_props:
last = self.check(prop)
if self.verbose:
print("\r ")

def look_for(self, pattern):
def check(self, pattern):
search_word = self.from_pattern(pattern)
last = self.check(search_word)
if last == True:
return
new_props = self.new_search_array(pattern, last)
for prop in new_props:
self.look_for(prop)

def check(self, word):
html = self.goto(word)
html = self.goto(search_word)
if "inga svar" in html:
return True
lemmas = html.split('class="lemma"')
Expand All @@ -89,7 +102,6 @@ def check(self, word):
return True

def _saol_lemmas(self, lemmas):

defs = []
for lemma in lemmas:
defs_text = []
Expand All @@ -109,12 +121,6 @@ def _saol_lemmas(self, lemmas):
return match.group(1)
return defs[-1][0]

def compile_regex(self, pattern):
pattern = pattern.replace('@',f'([{self.vocals}])').replace('£',f'([{self.letters}])').replace('$',f'([{self.consonants}])')
class_pattern = 'class="bform"[^<>]*>(' + pattern + ')</span>'
self.word_pattern = re.compile(pattern)
self.class_pattern = re.compile(class_pattern)

def from_pattern(self, pattern):
return pattern.replace('@','?').replace('£','?').replace('$','?')

Expand All @@ -129,13 +135,10 @@ def new_search_array(self, pattern, last):
letters = self.letters
letters_after = letters[letters.index(letter):]
new_patterns = [pattern[0:pos] + letter + pattern[pos+1:] for letter in letters_after]
if letter == "a":
extra_patterns = self.new_search_array(new_patterns[0], last)
new_patterns = extra_patterns + new_patterns[1:]
return new_patterns

def find_first(self, pattern, last):
match = self.first_finder.search(pattern)
match = self.first_finder.match(pattern)
pos = len(match.group(1))
sign = match.group(2)
return pos, sign
Expand All @@ -146,17 +149,6 @@ def past_words(self, numbers):
no *= n
return no

def get_wild_numbers(self):
wild_numbers = []
for sign in self.wild_sequence:
if sign == "@":
wild_numbers.append(9)
elif sign == "£":
wild_numbers.append(29)
else:
wild_numbers.append(20)
self.wild_numbers = wild_numbers

def calculate_progress(self, current):
if not self.verbose:
return
Expand Down Expand Up @@ -195,7 +187,6 @@ def prop(word):
return propositions

if __name__ == "__main__":
headless = True
word = None
saol = False
print_props = True
Expand Down

0 comments on commit 7bf711a

Please sign in to comment.