From 5c8edcda7106da9648eccd5b7de7ff74a7ae626f Mon Sep 17 00:00:00 2001 From: Daofeng Wu Date: Tue, 17 Dec 2024 19:25:22 +0900 Subject: [PATCH] test(scraper/incremental): print matched and unmatched hashes --- npiai/tools/web/scraper/__test__/incremental.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/npiai/tools/web/scraper/__test__/incremental.py b/npiai/tools/web/scraper/__test__/incremental.py index 453d27e..062f8c1 100644 --- a/npiai/tools/web/scraper/__test__/incremental.py +++ b/npiai/tools/web/scraper/__test__/incremental.py @@ -43,16 +43,20 @@ async def summarize(skip_item_hashes: Set[str] | None = None): start = time.monotonic() count = 0 hashes = set() + matched_hashes = set() async for chunk in stream: count += len(chunk["items"]) print("Chunk:", json.dumps(chunk, indent=2)) + matched_hashes.update(chunk["matched_hashes"]) for item in chunk["items"]: hashes.add(item["hash"]) end = time.monotonic() print(f"Summarized {count} items in {end - start:.2f} seconds") + print("Matched hashes:", matched_hashes) + print("Unmatched hashes:", hashes - matched_hashes) return hashes