source/tools/correlation/test.py (a08c3c7) - fs-workbench.git

a08c3c738edb3f82db13443a9cdb44ad01ebc820

separated the descendants a... Raymund Zacharias authored 6 years ago	`1) # encoding: utf-8 2) 3) import sys 4) from difflib import SequenceMatcher 5) import io 6) import re 7) import argparse 8) import urllib.request 9) import urllib.parse 10) import xml.etree.ElementTree as ET 11) from descendants import get_PG_decendants, get_descendants`
implemented ancestor lookup Raymund Zacharias authored 6 years ago	`12) from ancestor import get_ancestor`
separated the descendants a... Raymund Zacharias authored 6 years ago	13) 14) globalvars = { 15) "escape": False, 16) "data": { 17) "words": [ 18) {"id": 0, "language": "gem", "lemma": "bītaną"}, 19) {"id": 1, "language": "gem", "lemma": "hrītaną"}, 20) {"id": 2, "language": "deu", "lemma": "beißen"}, 21) {"id": 3, "language": "deu", "lemma": "reißen"}, 22) {"id": 4, "language": "nld", "lemma": "bijten"}, 23) {"id": 5, "language": "nld", "lemma": "rijten"}, 24) {"id": 6, "language": "deu", "lemma": "scheißen"}, 25) {"id": 7, "language": "gem", "lemma": "skītaną"}, 26) {"id": 8, "language": "nld", "lemma": "schijten"}, 27) ], 28) "origins": [ 29) {"from": 0, "to": 2}, 30) {"from": 1, "to": 3}, 31) {"from": 0, "to": 4}, 32) {"from": 1, "to": 5}, 33) {"from": 7, "to": 6}, 34) {"from": 7, "to": 8}, 35) ] 36) } 37) } 38) 39) 40) def myformat(string): 41) global globalvars 42) return (string.encode("unicode_escape") if globalvars["escape"] else string) 43) 44) 45) def compare(wordpair): 46) """Compare two words of the same language and find the biggest matching block. 47) e.g. PG bītaną and hrītaną share "ītaną" 48) """ 49) analyzation = SequenceMatcher(None, wordpair[0]["lemma"], wordpair[1]["lemma"]) 50) print("\ncomparing '%s' and '%s' ..." % (myformat(wordpair[0]["lemma"]), myformat(wordpair[1]["lemma"]))) 51) blocks = analyzation.get_matching_blocks() 52) for block in blocks: 53) print("\tcurrent block: %s" % (str(block))) 54) for index in range(len(wordpair)): 55) word = wordpair[index] 56) size = block[2] 57) if (size > 0): 58) begin = block[index] 59) end = begin+size 60) print("\t\t'%s'[%u:%u] = %s" % (myformat(word["lemma"]), begin, end, myformat(word["lemma"][begin:end]))) 61) 62) 63) def main(args): 64) global globalvars 65) parser = argparse.ArgumentParser() 66) parser.add_argument("--escape", dest="escape", action="store_const", const=sum, default=False, help="enable escape") 67) parameters = parser.parse_args(args) 68) globalvars["escape"] = parameters.escape 69) if (len(args) > 0): 70) escape = False 71) for i in range(0, len(globalvars["data"]["words"])): 72) word1 = globalvars["data"]["words"][i] 73) for j in range(i+1, len(globalvars["data"]["words"])): 74) word2 = globalvars["data"]["words"][j] 75) if (word1["language"] == word2["language"]): #if languages match 76) wordpair = (word1, word2)
refactored lookup code to a... Raymund Zacharias authored 6 years ago	`77) #compare(wordpair) #compare the pair`
added support of another te... Raymund Zacharias authored 6 years ago	`78) # result = list(map(lambda word: {"language": word["language"], "lemma": myformat(word["lemma"])}, get_PG_decendants("hrītaną"))) 79) # print(str(result))`
refactored lookup code to a... Raymund Zacharias authored 6 years ago	`80) ancestor_word = get_ancestor("froh", "de", "gem-pro")[0]["lemma"] 81) ancestor_word = ancestor_word.lstrip("*") 82) print(get_PG_decendants(ancestor_word)) 83) # print(get_ancestor("allègre", "fr", "la")) 84) # print(get_ancestor("allegro", "it", "la"))`