import io
import re
import urllib.request
import urllib.parse
import xml.etree.ElementTree as ET


def get_ancestor(title, language_code, ancestor_language_code):
    """Get and parse the descendant section from en.wiktionary.org for a specific title (usually word)"""
    request_parameters = {
        "action": "query",
        "format": "xml",
        "export": 1,
        "exportnowrap": 1,
        "titles": title,
        "redirects": 1
    }
    request_data = urllib.parse.urlencode(request_parameters)
    request_data = request_data.encode('utf8')
    wikipage_request = urllib.request.Request("https://en.wiktionary.org/w/api.php/", request_data)
    with urllib.request.urlopen(wikipage_request) as wikipage_response:
        # with open("Wiktionary-20170813141826.xml", "r", encoding="utf-8") as wikipage_response:
        wikipage_string = wikipage_response.read()
        root = ET.fromstring(wikipage_string)
        ns = {"mwns": "http://www.mediawiki.org/xml/export-0.10/"}
        # wikipage_text_string = root.find("./mwns:page/mwns:title", ns).text
        wikipage_text_string = root.find("./mwns:page/mwns:revision/mwns:text", ns).text
        # ET.dump(root)
        # print(wikipage_text_string)
        match = re.search(r"\=+Etymology.*?\=+", wikipage_text_string)
        etymology_string = wikipage_text_string[match.end():-1]
        ancestor_words = []
        # {{l|ang|hungor}} start_tag = "{{" end_tag = "}}" parameter_tag = "|"
        matches = re.findall(r"(?<={{).*?(?=}})",
                             etymology_string)  # find all matches for every string between {{ and }} in a non-greedy manner
        for id, match in enumerate(matches):
            parameters = match.split("|")
            # {{etyl|gem-pro|en}} {{m|gem-pro|*sagô}}
            if (parameters[0] == "etyl") and (parameters[1] == ancestor_language_code) and (parameters[2] == language_code):
                if id < len(matches):
                    next_match = matches[id + 1]
                    nm_parameters = next_match.split("|")
                    if (nm_parameters[0] == "m") and (nm_parameters[1] == ancestor_language_code):
                        ancestor_words.append({"language": nm_parameters[1], "lemma": nm_parameters[2]})
            # {{inh|en|gem-pro|*hwītaz}}
            if (parameters[0] == "inh") and (parameters[1] == language_code) and (parameters[2] == ancestor_language_code):
                ancestor_words.append({"language": parameters[2], "lemma": parameters[3]})
        return ancestor_words