Skip to content

Commit

Permalink
test(scraper/incremental): print matched and unmatched hashes
Browse files Browse the repository at this point in the history
  • Loading branch information
idiotWu committed Dec 17, 2024
1 parent 369934c commit 5c8edcd
Showing 1 changed file with 4 additions and 0 deletions.
4 changes: 4 additions & 0 deletions npiai/tools/web/scraper/__test__/incremental.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,16 +43,20 @@ async def summarize(skip_item_hashes: Set[str] | None = None):
start = time.monotonic()
count = 0
hashes = set()
matched_hashes = set()

async for chunk in stream:
count += len(chunk["items"])
print("Chunk:", json.dumps(chunk, indent=2))
matched_hashes.update(chunk["matched_hashes"])

for item in chunk["items"]:
hashes.add(item["hash"])

end = time.monotonic()
print(f"Summarized {count} items in {end - start:.2f} seconds")
print("Matched hashes:", matched_hashes)
print("Unmatched hashes:", hashes - matched_hashes)

return hashes

Expand Down

0 comments on commit 5c8edcd

Please sign in to comment.