A method for calculating the Levensthein edit distance between a term and its candidates reconcilied with Wikidata in OpenRefine (https://github.com/wetneb/openrefine-wikidata/issues/14)
def call_counter(func):
def helper(*args, **kwargs):
helper.calls += 1
return func(*args, **kwargs)
helper.calls = 0
helper.__name__= func.__name__
return helper
memo = {}
@call_counter
def levenshtein(s, t):
if s == "":
return len(t)
if t == "":
return len(s)
cost = 0 if s[-1] == t[-1] else 1
i1 = (s[:-1], t)
if not i1 in memo:
memo[i1] = levenshtein(*i1)
i2 = (s, t[:-1])
if not i2 in memo:
memo[i2] = levenshtein(*i2)
i3 = (s[:-1], t[:-1])
if not i3 in memo:
memo[i3] = levenshtein(*i3)
res = min([memo[i1]+1, memo[i2]+1, memo[i3]+cost])
return res
candidates_edit = []
for i in cell['recon']['candidates']:
candidates_edit.append(str(levenshtein(value, i.name)))
return "|".join(candidates_edit)