a08c3c738edb3f82db13443a9cdb44ad01ebc820
Raymund Zacharias separated the descendants a...

Raymund Zacharias authored 6 years ago

1) __author__ = 'Ray'
2) 
3) import io
4) import re
Raymund Zacharias refactored lookup code to a...

Raymund Zacharias authored 6 years ago

5) from mediawiki import get_wikipage_text
Raymund Zacharias separated the descendants a...

Raymund Zacharias authored 6 years ago

6) 
7) def get_descendants(title):
8)     """Get and parse the descendant section from en.wiktionary.org for a specific title (usually word)"""
Raymund Zacharias refactored lookup code to a...

Raymund Zacharias authored 6 years ago

9)     wikipage_text_string = get_wikipage_text(title)
10)     # ET.dump(root)
11)     # print(wikipage_text_string)
12)     match = re.search(r"\=+Descendants\=+", wikipage_text_string)
13)     descendants_string = wikipage_text_string[match.end():-1]
14)     descendant_words = []
15)     # {{l|ang|hungor}} start_tag = "{{" end_tag = "}}" parameter_tag = "|"
16)     descendants_buffer = io.StringIO(descendants_string)
17)     for line in descendants_buffer:
18)         matches = re.findall(r"(?<={{).*?(?=}})",
19)                              line)  # find all matches for every string between {{ and }} in a non-greedy manner
20)         for match in matches:
21)             match_split = match.split("|")
22)             if (match_split[0] == "l"):
23)                 descendant_words.append({"language": match_split[1], "lemma": match_split[2]})
24)     return descendant_words