Skip to content

Commit

Permalink
slob.py: dedup alias targets instead of alias keys, #556
Browse files Browse the repository at this point in the history
  • Loading branch information
ilius committed Apr 17, 2024
1 parent e6ab69e commit a493075
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 11 deletions.
23 changes: 13 additions & 10 deletions pyglossary/slob.py
Original file line number Diff line number Diff line change
Expand Up @@ -1305,18 +1305,21 @@ def read_key_frag(item: "Blob", default_fragment: str) -> "tuple[str, str]":
alias_writer.finalize()

with Slob(path) as resolved_aliases_reader:
previous_key = None
previous = None
targets = set()

for item in resolved_aliases_reader:
ref = pickle.loads(item.content)
if ref.key == previous_key:
continue
self._write_ref(
ref.key,
ref.bin_index,
ref.item_index,
ref.fragment,
)
previous_key = ref.key
if previous is not None and ref.key != previous.key:
for bin_index, item_index, fragment in targets:
self._write_ref(previous.key, bin_index, item_index, fragment)
targets.clear()
targets.add((ref.bin_index, ref.item_index, ref.fragment))
previous = ref

for bin_index, item_index, fragment in targets:
self._write_ref(previous.key, bin_index, item_index, fragment)

self._sort()
self._fire_event("end_resolve_aliases")

Expand Down
4 changes: 3 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,9 @@ lint.dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$"
"PLR0904", # Too many public methods
"C90", # mccabe: C901: {name} is too complex ({complexity})
]
"slob.py" = []
"slob.py" = [
"C901", # `...` is too complex (14 > 13)
]
"html_dir.py" = [
"C901", # `...` is too complex
]
Expand Down
8 changes: 8 additions & 0 deletions tests/slob_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -467,6 +467,7 @@ def observer(event):
w.add(v.encode("ascii"), k)

w.add_alias("w", "u")
w.add_alias("small u", "u")
w.add_alias("y1", "y2")
w.add_alias("y2", "y3")
w.add_alias("y3", "z")
Expand All @@ -482,6 +483,10 @@ def observer(event):

w.add_alias("g1", "g")
w.add_alias("g2", ("g1", "g-frag1"))

w.add_alias("n or p", "n")
w.add_alias("n or p", "p")

w.finalize()

self.assertEqual(too_many_redirects, ["l1", "l2", "l3"])
Expand All @@ -494,6 +499,7 @@ def get(key):
return [item.content.decode("ascii") for item in d[key]]

self.assertEqual(get("w"), ["LATIN SMALL LETTER U"])
self.assertEqual(get("small u"), ["LATIN SMALL LETTER U"])
self.assertEqual(get("y1"), ["LATIN SMALL LETTER Z"])
self.assertEqual(get("y2"), ["LATIN SMALL LETTER Z"])
self.assertEqual(get("y3"), ["LATIN SMALL LETTER Z"])
Expand All @@ -502,6 +508,8 @@ def get(key):
self.assertEqual(get("l2"), [])
self.assertEqual(get("l3"), [])

self.assertEqual(len(list(d["n or p"])), 2)

item_a1 = cast(slob.Blob, next(d["a1"]))
self.assertEqual(item_a1.content, b"LATIN SMALL LETTER A")
self.assertEqual(item_a1.fragment, "a-frag1")
Expand Down

0 comments on commit a493075

Please sign in to comment.