From 7bf711af75dfdb96ea697b4a205437004d508970 Mon Sep 17 00:00:00 2001 From: ontanj Date: Wed, 11 Mar 2020 14:17:31 +0100 Subject: [PATCH] minor fixes --- propositions.py | 71 +++++++++++++++++++++---------------------------- 1 file changed, 31 insertions(+), 40 deletions(-) diff --git a/propositions.py b/propositions.py index 6ee1b77..223b819 100644 --- a/propositions.py +++ b/propositions.py @@ -9,19 +9,25 @@ class SAOLWordFinder: def __init__(self, pattern, verbose=False): - self.first_finder = re.compile(r'^(\w*)([$|@|£])') - self.find_def = re.compile(r'class="def".*?>(.*?)(?:(.*?)(.*?))?', re.S) - self.find_links = re.compile(r"onclick=\"return loadDiv\('#saol-1','(/tri/f_saol\.php\?id=.*?)'\)\">(?:  |1)") self.pattern = pattern self.words = [] self.consonants = "bcdfghjklmnpqrstvwxz" self.vocals = "aeiouyåäö" self.letters = "abcdefghijklmnopqrstuvwxyzåäö" + self.first_finder = re.compile(r'^(\w*?)([$|@|£])') + self.find_def = re.compile(r'class="def"[^<>]*>([^<>]*)(?:]*>([^<>]*)([^<>]*))?(?:]*>([^<>]*)([^<>]*))?(?:]*>([^<>]*)([^<>]*))?(?:]*>([^<>]*)([^<>]*))?', re.S) + self.find_links = re.compile(r"onclick=\"return loadDiv\('#saol-1','(/tri/f_saol\.php\?id=.*?)'\)\">(?:  |1)") + self.find_grundform = re.compile(r'(.*?)') self.compile_regex(pattern) - self.no_of_props = self.find_no_of_props(pattern) + self.find_no_of_props(pattern) self.verbose = verbose self.get_wild_numbers() - self.find_grundform = re.compile(r'(.*?)') + + def compile_regex(self, pattern): + pattern = pattern.replace('@',f'([{self.vocals}])').replace('£',f'([{self.letters}])').replace('$',f'([{self.consonants}])') + class_pattern = 'class="bform"[^<>]*>(' + pattern + ')' + self.word_pattern = re.compile(pattern) + self.class_pattern = re.compile(class_pattern) def find_no_of_props(self, pattern): self.wild_sequence = re.findall(r'[@£$]', pattern) @@ -33,7 +39,18 @@ def find_no_of_props(self, pattern): no *= 29 else: no *= 20 - return no + self.no_of_props = no + + def get_wild_numbers(self): + wild_numbers = [] + for sign in self.wild_sequence: + if sign == "@": + wild_numbers.append(9) + elif sign == "£": + wild_numbers.append(29) + else: + wild_numbers.append(20) + self.wild_numbers = wild_numbers def goto(self, word): word = urllib.parse.quote(word) @@ -53,21 +70,17 @@ def fit(self, lemma): return match.group(1) def search(self): - self.look_for(self.pattern) + last = self.check(self.pattern) + while last != True: + new_props = self.new_search_array(self.pattern, last) + for prop in new_props: + last = self.check(prop) if self.verbose: print("\r ") - def look_for(self, pattern): + def check(self, pattern): search_word = self.from_pattern(pattern) - last = self.check(search_word) - if last == True: - return - new_props = self.new_search_array(pattern, last) - for prop in new_props: - self.look_for(prop) - - def check(self, word): - html = self.goto(word) + html = self.goto(search_word) if "inga svar" in html: return True lemmas = html.split('class="lemma"') @@ -89,7 +102,6 @@ def check(self, word): return True def _saol_lemmas(self, lemmas): - defs = [] for lemma in lemmas: defs_text = [] @@ -109,12 +121,6 @@ def _saol_lemmas(self, lemmas): return match.group(1) return defs[-1][0] - def compile_regex(self, pattern): - pattern = pattern.replace('@',f'([{self.vocals}])').replace('£',f'([{self.letters}])').replace('$',f'([{self.consonants}])') - class_pattern = 'class="bform"[^<>]*>(' + pattern + ')' - self.word_pattern = re.compile(pattern) - self.class_pattern = re.compile(class_pattern) - def from_pattern(self, pattern): return pattern.replace('@','?').replace('£','?').replace('$','?') @@ -129,13 +135,10 @@ def new_search_array(self, pattern, last): letters = self.letters letters_after = letters[letters.index(letter):] new_patterns = [pattern[0:pos] + letter + pattern[pos+1:] for letter in letters_after] - if letter == "a": - extra_patterns = self.new_search_array(new_patterns[0], last) - new_patterns = extra_patterns + new_patterns[1:] return new_patterns def find_first(self, pattern, last): - match = self.first_finder.search(pattern) + match = self.first_finder.match(pattern) pos = len(match.group(1)) sign = match.group(2) return pos, sign @@ -146,17 +149,6 @@ def past_words(self, numbers): no *= n return no - def get_wild_numbers(self): - wild_numbers = [] - for sign in self.wild_sequence: - if sign == "@": - wild_numbers.append(9) - elif sign == "£": - wild_numbers.append(29) - else: - wild_numbers.append(20) - self.wild_numbers = wild_numbers - def calculate_progress(self, current): if not self.verbose: return @@ -195,7 +187,6 @@ def prop(word): return propositions if __name__ == "__main__": - headless = True word = None saol = False print_props = True