source/tools/correlation/test.py (b3c4063) - fs-workbench.git

b3c4063a7ccae2b875f8b408d5574479f29bc0ad

separated the descendants a... Raymund Zacharias authored 6 years ago	`1) # encoding: utf-8 2) 3) import sys 4) from difflib import SequenceMatcher 5) import io 6) import re 7) import argparse 8) import urllib.request 9) import urllib.parse 10) import xml.etree.ElementTree as ET 11) from descendants import get_PG_decendants, get_descendants`
implemented ancestor lookup Raymund Zacharias authored 6 years ago	`12) from ancestor import get_ancestor`
separated the descendants a... Raymund Zacharias authored 6 years ago	13) 14) globalvars = { 15) "escape": False, 16) "data": { 17) "words": [ 18) {"id": 0, "language": "gem", "lemma": "bītaną"}, 19) {"id": 1, "language": "gem", "lemma": "hrītaną"}, 20) {"id": 2, "language": "deu", "lemma": "beißen"}, 21) {"id": 3, "language": "deu", "lemma": "reißen"}, 22) {"id": 4, "language": "nld", "lemma": "bijten"}, 23) {"id": 5, "language": "nld", "lemma": "rijten"}, 24) {"id": 6, "language": "deu", "lemma": "scheißen"}, 25) {"id": 7, "language": "gem", "lemma": "skītaną"}, 26) {"id": 8, "language": "nld", "lemma": "schijten"}, 27) ], 28) "origins": [ 29) {"from": 0, "to": 2}, 30) {"from": 1, "to": 3}, 31) {"from": 0, "to": 4}, 32) {"from": 1, "to": 5}, 33) {"from": 7, "to": 6}, 34) {"from": 7, "to": 8}, 35) ] 36) } 37) } 38) 39) 40) def myformat(string): 41) global globalvars 42) return (string.encode("unicode_escape") if globalvars["escape"] else string) 43) 44) 45) def compare(wordpair): 46) """Compare two words of the same language and find the biggest matching block. 47) e.g. PG bītaną and hrītaną share "ītaną" 48) """ 49) analyzation = SequenceMatcher(None, wordpair[0]["lemma"], wordpair[1]["lemma"]) 50) print("\ncomparing '%s' and '%s' ..." % (myformat(wordpair[0]["lemma"]), myformat(wordpair[1]["lemma"]))) 51) blocks = analyzation.get_matching_blocks() 52) for block in blocks: 53) print("\tcurrent block: %s" % (str(block))) 54) for index in range(len(wordpair)): 55) word = wordpair[index] 56) size = block[2] 57) if (size > 0): 58) begin = block[index] 59) end = begin+size 60) print("\t\t'%s'[%u:%u] = %s" % (myformat(word["lemma"]), begin, end, myformat(word["lemma"][begin:end]))) 61) 62) 63) def main(args): 64) global globalvars 65) parser = argparse.ArgumentParser() 66) parser.add_argument("--escape", dest="escape", action="store_const", const=sum, default=False, help="enable escape") 67) parameters = parser.parse_args(args) 68) globalvars["escape"] = parameters.escape 69) if (len(args) > 0): 70) escape = False 71) for i in range(0, len(globalvars["data"]["words"])): 72) word1 = globalvars["data"]["words"][i] 73) for j in range(i+1, len(globalvars["data"]["words"])): 74) word2 = globalvars["data"]["words"][j] 75) if (word1["language"] == word2["language"]): #if languages match 76) wordpair = (word1, word2) 77) compare(wordpair) #compare the pair
added support of another te... Raymund Zacharias authored 6 years ago	`78) # result = list(map(lambda word: {"language": word["language"], "lemma": myformat(word["lemma"])}, get_PG_decendants("hrītaną"))) 79) # print(str(result)) 80) # ancestor_word = get_ancestor("allègre", "fr", "la")[0]["lemma"] 81) # ancestor_word = ancestor_word.lstrip("*") 82) # print(get_PG_decendants(ancestor_word)) 83) print(get_ancestor("allègre", "fr", "la")) 84) print(get_ancestor("allegro", "it", "la"))`