ettorerizza
6/10/2017 - 10:41 AM

A method for calculating the Levensthein edit distance between a term and its candidates reconcilied with Wikidata in OpenRefine (https://gi

A method for calculating the Levensthein edit distance between a term and its candidates reconcilied with Wikidata in OpenRefine (https://github.com/wetneb/openrefine-wikidata/issues/14)

def call_counter(func):
    def helper(*args, **kwargs):
        helper.calls += 1
        return func(*args, **kwargs)
    helper.calls = 0
    helper.__name__= func.__name__
    return helper
memo = {}
@call_counter
def levenshtein(s, t):
    if s == "":
        return len(t)
    if t == "":
        return len(s)
    cost = 0 if s[-1] == t[-1] else 1
       
    i1 = (s[:-1], t)
    if not i1 in memo:
        memo[i1] = levenshtein(*i1)
    i2 = (s, t[:-1])
    if not i2 in memo:
        memo[i2] = levenshtein(*i2)
    i3 = (s[:-1], t[:-1])
    if not i3 in memo:
        memo[i3] = levenshtein(*i3)
    res = min([memo[i1]+1, memo[i2]+1, memo[i3]+cost])
    return res

candidates_edit = []
for i in cell['recon']['candidates']:
    candidates_edit.append(str(levenshtein(value, i.name)))

return "|".join(candidates_edit)