Skip to content

Commit

Permalink
Merge pull request #272 from arXiv/ARXIVCE-1634-fs-abs
Browse files Browse the repository at this point in the history
Arxivce 1634 fs abs
  • Loading branch information
mnazzaro committed May 14, 2024
2 parents 55a0304 + 6e687b5 commit 6b4e413
Showing 1 changed file with 7 additions and 9 deletions.
16 changes: 7 additions & 9 deletions arxiv/document/parse_abs.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from dateutil import parser

from ..taxonomy.definitions import ARCHIVES, CATEGORIES
from ..files import FileObj, FileDoesNotExist
from .metadata import AuthorList, DocMetadata, Submitter
from ..config import settings
from .version import VersionEntry, SourceFlag
Expand Down Expand Up @@ -63,26 +64,23 @@
"""FS timezone if in a flask app."""


def parse_abs_file(filename: str) -> DocMetadata:
def parse_abs_file(file: FileObj) -> DocMetadata:
"""Parse an arXiv .abs file from the local FS.
The modified time on the abs file will be used as the modified time for the
abstract. It will be pulled from `flask.config` if in a app_context. It
can be specified with tz arg.
"""

absfile = Path(filename)
if isinstance(file, FileDoesNotExist):
raise AbsNotFoundException
try:
with absfile.open(mode='r', encoding='latin-1') as absf:
raw = absf.read()
modified = datetime.fromtimestamp(absfile.stat().st_mtime, tz=_get_tz())
modified = modified.astimezone(ZoneInfo("UTC"))
return parse_abs(raw, modified)
with file.open(mode='r', encoding='latin-1') as absf:
return parse_abs(absf.read(), file.updated)

except FileNotFoundError:
raise AbsNotFoundException
except UnicodeDecodeError as e:
raise AbsParsingException(f'Failed to decode .abs file "{filename}": {e}')
raise AbsParsingException(f'Failed to decode .abs file "{file}": {e}')



Expand Down

0 comments on commit 6b4e413

Please sign in to comment.