diff --git a/arxiv/integration/fastly/purge.py b/arxiv/integration/fastly/purge.py index 13affd6f..92be9d7d 100644 --- a/arxiv/integration/fastly/purge.py +++ b/arxiv/integration/fastly/purge.py @@ -11,7 +11,7 @@ from arxiv.config import settings from arxiv.db import Session from arxiv.db.models import Metadata, Updates -from arxiv.identifier import Identifier +from arxiv.identifier import Identifier, IdentifierException from arxiv.taxonomy.definitions import CATEGORIES from arxiv.taxonomy.category import get_all_cats_from_string @@ -25,6 +25,7 @@ def purge_cache_for_paper(paper_id:str, old_cats:Optional[str]=None): """purges all keys needed for an unspecified change to a paper clears everything related to the paper, as well as any list and year pages it is on old_cats: include this string if the paper undergoes a category change to also purge pages the paper may have been removed from (or new year pages it is added to) + raises an IdentifierException if the paper_id is invalid, and KeyError if the category string contains invalid categories """ arxiv_id = Identifier(paper_id) keys=_purge_category_change(arxiv_id, old_cats) @@ -59,6 +60,9 @@ def _get_category_and_date(arxiv_id:Identifier)-> Tuple[str, date]: .filter(up.action != "absonly") .first() ) + if not result: + raise IdentifierException(f'paper id does not exist: {arxiv_id.id}') + new_cats: str=result[0] recent_date: date=result[1] return new_cats, recent_date diff --git a/arxiv/integration/tests/test_fastly.py b/arxiv/integration/tests/test_fastly.py index c3486cc5..84baa3bc 100644 --- a/arxiv/integration/tests/test_fastly.py +++ b/arxiv/integration/tests/test_fastly.py @@ -1,10 +1,11 @@ import unittest +import pytest from unittest.mock import patch, MagicMock from datetime import date from fastly.api.purge_api import PurgeApi -from arxiv.identifier import Identifier -from arxiv.integration.fastly.purge import purge_fastly_keys, _purge_category_change, purge_cache_for_paper +from arxiv.identifier import Identifier, IdentifierException +from arxiv.integration.fastly.purge import purge_fastly_keys, _purge_category_change, purge_cache_for_paper, _get_category_and_date from arxiv.integration.fastly.headers import add_surrogate_key #tests for the purge keys utility function @@ -212,4 +213,12 @@ def test_purge_cache_for_paper(mockToday,mockPurge, mockDBQuery): expected_keys=["list-2010-01-cs.LG", "list-2010-cs.LG", "list-2010-01-cs", "list-2010-cs", "list-2010-01-cs.DC", "list-2010-cs.DC", "paper-id-1001.5678"] purge_cache_for_paper('1001.5678',"cs.LG") actual_keys = mockPurge.call_args[0][0] - assert sorted(actual_keys) == sorted (expected_keys) \ No newline at end of file + assert sorted(actual_keys) == sorted (expected_keys) + +def test_get_category_and_date_nonexstant_ids(): + #there is no paper with this id + #also base has no test db so any paper would return none, but this will work even if it gets data + bad_id=Identifier("0807.9999") + with pytest.raises(IdentifierException): + _get_category_and_date(bad_id) + \ No newline at end of file diff --git a/arxiv/taxonomy/category.py b/arxiv/taxonomy/category.py index 76ad4c90..a77f41f8 100644 --- a/arxiv/taxonomy/category.py +++ b/arxiv/taxonomy/category.py @@ -144,6 +144,7 @@ def get_all_cats_from_string(cat_string:str, only_cannonical:Optional[bool]=Fals This is needed because alternate names are not always recorded in the strings setting only_cannonical to true will not return non canonical categories, but will stil return the archives from non-cannonical alias pairs This is meant to be used on catgory strings from the database, not user input + raises KeyError if category string is invalid """ from .definitions import CATEGORIES, CATEGORY_ALIASES cats = {CATEGORIES[cat].get_canonical() for cat in cat_string.split()}