Skip to content

Commit

Permalink
Merge pull request #303 from arXiv/ARXIVCE-2413-handle-nonexisting-pa…
Browse files Browse the repository at this point in the history
…pers

trying to purge a paper that doesn't exist results in an IdentifierEx…
  • Loading branch information
kyokukou committed Aug 19, 2024
2 parents be4d478 + e6d559f commit 069a311
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 4 deletions.
6 changes: 5 additions & 1 deletion arxiv/integration/fastly/purge.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from arxiv.config import settings
from arxiv.db import Session
from arxiv.db.models import Metadata, Updates
from arxiv.identifier import Identifier
from arxiv.identifier import Identifier, IdentifierException
from arxiv.taxonomy.definitions import CATEGORIES
from arxiv.taxonomy.category import get_all_cats_from_string

Expand All @@ -25,6 +25,7 @@ def purge_cache_for_paper(paper_id:str, old_cats:Optional[str]=None):
"""purges all keys needed for an unspecified change to a paper
clears everything related to the paper, as well as any list and year pages it is on
old_cats: include this string if the paper undergoes a category change to also purge pages the paper may have been removed from (or new year pages it is added to)
raises an IdentifierException if the paper_id is invalid, and KeyError if the category string contains invalid categories
"""
arxiv_id = Identifier(paper_id)
keys=_purge_category_change(arxiv_id, old_cats)
Expand Down Expand Up @@ -59,6 +60,9 @@ def _get_category_and_date(arxiv_id:Identifier)-> Tuple[str, date]:
.filter(up.action != "absonly")
.first()
)
if not result:
raise IdentifierException(f'paper id does not exist: {arxiv_id.id}')

new_cats: str=result[0]
recent_date: date=result[1]
return new_cats, recent_date
Expand Down
15 changes: 12 additions & 3 deletions arxiv/integration/tests/test_fastly.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
import unittest
import pytest
from unittest.mock import patch, MagicMock
from datetime import date
from fastly.api.purge_api import PurgeApi

from arxiv.identifier import Identifier
from arxiv.integration.fastly.purge import purge_fastly_keys, _purge_category_change, purge_cache_for_paper
from arxiv.identifier import Identifier, IdentifierException
from arxiv.integration.fastly.purge import purge_fastly_keys, _purge_category_change, purge_cache_for_paper, _get_category_and_date
from arxiv.integration.fastly.headers import add_surrogate_key

#tests for the purge keys utility function
Expand Down Expand Up @@ -212,4 +213,12 @@ def test_purge_cache_for_paper(mockToday,mockPurge, mockDBQuery):
expected_keys=["list-2010-01-cs.LG", "list-2010-cs.LG", "list-2010-01-cs", "list-2010-cs", "list-2010-01-cs.DC", "list-2010-cs.DC", "paper-id-1001.5678"]
purge_cache_for_paper('1001.5678',"cs.LG")
actual_keys = mockPurge.call_args[0][0]
assert sorted(actual_keys) == sorted (expected_keys)
assert sorted(actual_keys) == sorted (expected_keys)

def test_get_category_and_date_nonexstant_ids():
#there is no paper with this id
#also base has no test db so any paper would return none, but this will work even if it gets data
bad_id=Identifier("0807.9999")
with pytest.raises(IdentifierException):
_get_category_and_date(bad_id)

1 change: 1 addition & 0 deletions arxiv/taxonomy/category.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,7 @@ def get_all_cats_from_string(cat_string:str, only_cannonical:Optional[bool]=Fals
This is needed because alternate names are not always recorded in the strings
setting only_cannonical to true will not return non canonical categories, but will stil return the archives from non-cannonical alias pairs
This is meant to be used on catgory strings from the database, not user input
raises KeyError if category string is invalid
"""
from .definitions import CATEGORIES, CATEGORY_ALIASES
cats = {CATEGORIES[cat].get_canonical() for cat in cat_string.split()}
Expand Down

0 comments on commit 069a311

Please sign in to comment.