def subdist(needle, haystack): """ Calculates the fuzzy match of needle in haystack, using a modified version of the Levenshtein distance algorithm. The function is modified from the levenshtein function in the bktree module by Adam Hupp """ m, n = len(needle), len(haystack) # base cases if m == 1: return not needle in haystack if not n: return m row1 = [0] * (n+1) for i in range(0,m): row2 = [i+1] for j in range(0,n): cost = ( needle[i] != haystack[j] ) row2.append( min(row1[j+1]+1, # deletion row2[j]+1, #insertion row1[j]+cost) #substitution ) row1 = row2 return min(row1) def time_subdist(n): 'subdist(i)' result = '' text = u"the quick brown fox jumped over the lazy dogs" for i in range(n): d = subdist(u"quick", text) assert d == 0, d d = subdist(u"quickly", text) assert d == 2, d d = subdist(u"brown", text) assert d == 0, d d = subdist(u"Brown", text) assert d == 1, d d = subdist(u"fox", text) assert d == 0, d d = subdist(u"foxy", text) assert d == 1, d d = subdist(u"jumped", text) assert d == 0, d d = subdist(u"jumps", text) assert d == 1, d d = subdist(u"over", text) assert d == 0, d d = subdist(u"oven", text) assert d == 1, d d = subdist(u"lazy", text) assert d == 0, d d = subdist(u"dog", text) assert d == 0, d d = subdist(u"dogs", text) assert d == 0, d d = subdist(u"SPAM", text) assert d == 4, d d = subdist(u"", text) assert d == 0, d