-
Notifications
You must be signed in to change notification settings - Fork 609
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Cleaning up and testing get_directory_index
#483
Changes from all commits
96e18c9
519aab8
a5399aa
f08d2f3
808f569
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,10 +3,13 @@ | |
import itertools | ||
import json | ||
import re | ||
import shutil | ||
import tempfile | ||
import time | ||
from pathlib import Path | ||
from typing import Any, cast | ||
from unittest.mock import patch | ||
from uuid import uuid4 | ||
|
||
import ldp.agent | ||
import pytest | ||
|
@@ -38,22 +41,56 @@ | |
|
||
@pytest.mark.asyncio | ||
async def test_get_directory_index(agent_test_settings: Settings) -> None: | ||
index = await get_directory_index(settings=agent_test_settings) | ||
assert index.fields == [ | ||
"file_location", | ||
"body", | ||
"title", | ||
"year", | ||
], "Incorrect fields in index" | ||
# paper.pdf + empty.txt + flag_day.html + bates.txt + obama.txt, | ||
# but empty.txt fails to be added | ||
path_to_id = await index.index_files | ||
assert ( | ||
sum(id_ != FAILED_DOCUMENT_ADD_ID for id_ in path_to_id.values()) == 4 | ||
), "Incorrect number of parsed index files" | ||
results = await index.query(query="who is Frederick Bates?") | ||
paper_dir = cast(Path, agent_test_settings.paper_directory) | ||
assert results[0].docs.keys() == {md5sum((paper_dir / "bates.txt").absolute())} | ||
# Since agent_test_settings is used by other tests, and thus uses the same | ||
# paper_directory as other tests, we use a tempdir so we can delete files | ||
# without affecting concurrent tests | ||
with tempfile.TemporaryDirectory() as tempdir: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would have thought this fixture would handle that -- did you see deleting files affecting other tests? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah good question. I just expanded the explanatory comment to address this: # Since agent_test_settings is used by other tests, and thus uses the same
# paper_directory as other tests, we use a tempdir so we can delete files
# without affecting concurrent tests |
||
shutil.copytree( | ||
agent_test_settings.paper_directory, tempdir, dirs_exist_ok=True | ||
) | ||
paper_dir = agent_test_settings.paper_directory = Path(tempdir) | ||
|
||
index_name = f"stub{uuid4()}" # Unique across test invocations | ||
index = await get_directory_index( | ||
index_name=index_name, settings=agent_test_settings | ||
) | ||
assert ( | ||
index.index_name == index_name | ||
), "Index name should match its specification" | ||
assert index.fields == [ | ||
"file_location", | ||
"body", | ||
"title", | ||
"year", | ||
], "Incorrect fields in index" | ||
# paper.pdf + empty.txt + flag_day.html + bates.txt + obama.txt, | ||
# but empty.txt fails to be added | ||
path_to_id = await index.index_files | ||
assert ( | ||
sum(id_ != FAILED_DOCUMENT_ADD_ID for id_ in path_to_id.values()) == 4 | ||
), "Incorrect number of parsed index files" | ||
results = await index.query(query="who is Frederick Bates?") | ||
assert results[0].docs.keys() == {md5sum((paper_dir / "bates.txt").absolute())} | ||
|
||
# Check getting the same index name will not reprocess files | ||
with patch.object(Docs, "aadd") as mock_aadd: | ||
index = await get_directory_index( | ||
index_name=index_name, settings=agent_test_settings | ||
) | ||
assert len(await index.index_files) == len(path_to_id) | ||
mock_aadd.assert_not_awaited(), "Expected we didn't re-add files" | ||
|
||
# Now we actually remove (but not add!) a file from the paper directory, | ||
# and we still don't reprocess files | ||
(paper_dir / "obama.txt").unlink() | ||
with patch.object( | ||
Docs, "aadd", autospec=True, side_effect=Docs.aadd | ||
) as mock_aadd: | ||
index = await get_directory_index( | ||
index_name=index_name, settings=agent_test_settings | ||
) | ||
assert len(await index.index_files) == len(path_to_id) - 1 | ||
mock_aadd.assert_not_awaited(), "Expected we didn't re-add files" | ||
|
||
|
||
@pytest.mark.asyncio | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This was a bit noisy, so downgraded its level