Merge pull request #303 from arXiv/ARXIVCE-2413-handle-nonexisting-pa…

…pers trying to purge a paper that doesn't exist results in an IdentifierEx…
arXiv · Aug 19, 2024 · 069a311 · 069a311
2 parents be4d478 + e6d559f
commit 069a311
Show file tree

Hide file tree

Showing 3 changed files with 18 additions and 4 deletions.
diff --git a/arxiv/integration/fastly/purge.py b/arxiv/integration/fastly/purge.py
@@ -11,7 +11,7 @@
 from arxiv.config import settings
 from arxiv.db import Session
 from arxiv.db.models import Metadata, Updates
-from arxiv.identifier import Identifier
+from arxiv.identifier import Identifier, IdentifierException
 from arxiv.taxonomy.definitions import CATEGORIES
 from arxiv.taxonomy.category import get_all_cats_from_string 
 
@@ -25,6 +25,7 @@ def purge_cache_for_paper(paper_id:str, old_cats:Optional[str]=None):
     """purges all keys needed for an unspecified change to a paper
     clears everything related to the paper, as well as any list and year pages it is on
     old_cats: include this string if the paper undergoes a category change to also purge pages the paper may have been removed from (or new year pages it is added to)
+    raises an IdentifierException if the paper_id is invalid, and KeyError if the category string contains invalid categories
     """
     arxiv_id = Identifier(paper_id)
     keys=_purge_category_change(arxiv_id, old_cats)
@@ -59,6 +60,9 @@ def _get_category_and_date(arxiv_id:Identifier)-> Tuple[str, date]:
         .filter(up.action != "absonly")
         .first()
     )
+    if not result:
+        raise IdentifierException(f'paper id does not exist: {arxiv_id.id}')
+
     new_cats: str=result[0]
     recent_date: date=result[1]
     return new_cats, recent_date

diff --git a/arxiv/integration/tests/test_fastly.py b/arxiv/integration/tests/test_fastly.py
@@ -1,10 +1,11 @@
 import unittest
+import pytest
 from unittest.mock import patch, MagicMock
 from datetime import date
 from fastly.api.purge_api import PurgeApi
 
-from arxiv.identifier import Identifier
-from arxiv.integration.fastly.purge import purge_fastly_keys, _purge_category_change, purge_cache_for_paper
+from arxiv.identifier import Identifier, IdentifierException
+from arxiv.integration.fastly.purge import purge_fastly_keys, _purge_category_change, purge_cache_for_paper, _get_category_and_date
 from arxiv.integration.fastly.headers import add_surrogate_key
 
 #tests for the purge keys utility function
@@ -212,4 +213,12 @@ def test_purge_cache_for_paper(mockToday,mockPurge, mockDBQuery):
     expected_keys=["list-2010-01-cs.LG", "list-2010-cs.LG", "list-2010-01-cs", "list-2010-cs", "list-2010-01-cs.DC", "list-2010-cs.DC", "paper-id-1001.5678"]
     purge_cache_for_paper('1001.5678',"cs.LG")
     actual_keys = mockPurge.call_args[0][0]
-    assert sorted(actual_keys) == sorted (expected_keys)
+    assert sorted(actual_keys) == sorted (expected_keys)
+
+def test_get_category_and_date_nonexstant_ids():
+    #there is no paper with this id 
+    #also base has no test db so any paper would return none, but this will work even if it gets data
+    bad_id=Identifier("0807.9999")
+    with pytest.raises(IdentifierException):
+        _get_category_and_date(bad_id)
+
diff --git a/arxiv/taxonomy/category.py b/arxiv/taxonomy/category.py
@@ -144,6 +144,7 @@ def get_all_cats_from_string(cat_string:str, only_cannonical:Optional[bool]=Fals
     This is needed because alternate names are not always recorded in the strings
     setting only_cannonical to true will not return non canonical categories, but will stil return the archives from non-cannonical alias pairs
     This is meant to be used on catgory strings from the database, not user input
+    raises KeyError if category string is invalid
     """
     from .definitions import CATEGORIES, CATEGORY_ALIASES
     cats = {CATEGORIES[cat].get_canonical() for cat in cat_string.split()}