Skip to content

Commit

Permalink
Try a faster levenshtein
Browse files Browse the repository at this point in the history
  • Loading branch information
SimonThordal committed Aug 23, 2024
1 parent ff196d7 commit b28f696
Showing 1 changed file with 2 additions and 2 deletions.
4 changes: 2 additions & 2 deletions nomenklatura/matching/regression_v2/names.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from normality import WS
from followthemoney.proxy import E
from followthemoney.types import registry
from rigour.text.distance import levenshtein
from fxkk import levenshtein

from nomenklatura.matching.util import max_in_sets, props_pair, type_pair
from nomenklatura.matching.compare.util import is_disjoint, has_overlap
Expand All @@ -29,7 +29,7 @@ def _name_norms(names: Iterable[str]) -> List[str]:


def _compare_levenshtein(left: str, right: str) -> float:
distance = levenshtein(left, right)
distance = int(levenshtein(left, right))
base = max((1, len(left), len(right)))
return 1.0 - (distance / float(base))
# return math.sqrt(distance)
Expand Down

0 comments on commit b28f696

Please sign in to comment.