Skip to content

Commit

Permalink
Bug Fix: When ascii_strategy was set to ignore, it still removed thos…
Browse files Browse the repository at this point in the history
…e characters
  • Loading branch information
yamatt committed Nov 7, 2024
1 parent 58000cb commit 5e10813
Show file tree
Hide file tree
Showing 3 changed files with 13 additions and 4 deletions.
9 changes: 6 additions & 3 deletions homoglyphs_fork/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,10 @@ class Homoglyphs:
def __init__(self, categories=None, languages=None, alphabet=None,
strategy=STRATEGY_IGNORE, ascii_strategy=STRATEGY_IGNORE,
ascii_range=ASCII_RANGE):
"""
:param ascii_strategy: action to take on unmatched char when converting to ascii
:type ascii_strategy: int
"""
# strategies
if strategy not in (STRATEGY_LOAD, STRATEGY_IGNORE, STRATEGY_REMOVE):
raise ValueError('Invalid strategy')
Expand Down Expand Up @@ -220,7 +224,7 @@ def _get_combinations(self, text, ascii=False):
if ascii:
alt_chars = [char for char in alt_chars if ord(char) in self.ascii_range]
if not alt_chars and self.ascii_strategy == STRATEGY_IGNORE:
return
alt_chars.append(char)

if alt_chars:
variations.append(alt_chars)
Expand All @@ -233,8 +237,7 @@ def get_combinations(self, text):

def _to_ascii(self, text):
for variant in self._get_combinations(text, ascii=True):
if max(map(ord, variant)) in self.ascii_range:
yield variant
yield variant

def to_ascii(self, text):
return self.uniq_and_sort(self._to_ascii(text))
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ dev-dependencies = ["flake8>=7.0.0"]
[tool.rye.scripts]
lint = "flake8 . --count --show-source --statistics"
generate = "python generate.py homoglyphs_fork"
test = "python tests.py"

[tool.dephell.main]
from = {format = "flit", path = "pyproject.toml"}
Expand Down
7 changes: 6 additions & 1 deletion tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ def test_ascii_strategy(self):
categories=('LATIN', 'COMMON', 'CYRILLIC'),
ascii_strategy=STRATEGY_IGNORE,
).to_ascii(u'xхч2')
self.assertEqual(ss, [])
self.assertEqual(ss, [u'xxч2'])

ss = Homoglyphs(
categories=('LATIN', 'COMMON', 'CYRILLIC'),
Expand All @@ -127,6 +127,11 @@ def test_ascii_strategy(self):
).to_ascii(u'ч')
self.assertEqual(ss, [])

# this test is based on https://github.com/yamatt/homoglyphs/issues/4
ss = Homoglyphs(ascii_strategy=STRATEGY_IGNORE).to_ascii('ß')
self.assertEqual(ss, ['ß'])


# # This char hasn't been added yet :/
# ss = Homoglyphs(
# strategy=STRATEGY_LOAD,
Expand Down

0 comments on commit 5e10813

Please sign in to comment.