Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Arxivce 1634 fs abs #272

Merged
merged 8 commits into from
May 14, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 7 additions & 9 deletions arxiv/document/parse_abs.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from dateutil import parser

from ..taxonomy.definitions import ARCHIVES, CATEGORIES
from ..files import FileObj, FileDoesNotExist
from .metadata import AuthorList, DocMetadata, Submitter
from ..config import settings
from .version import VersionEntry, SourceFlag
Expand Down Expand Up @@ -63,26 +64,23 @@
"""FS timezone if in a flask app."""


def parse_abs_file(filename: str) -> DocMetadata:
def parse_abs_file(file: FileObj) -> DocMetadata:
"""Parse an arXiv .abs file from the local FS.

The modified time on the abs file will be used as the modified time for the
abstract. It will be pulled from `flask.config` if in a app_context. It
can be specified with tz arg.
"""

absfile = Path(filename)
if isinstance(file, FileDoesNotExist):
raise AbsNotFoundException
try:
with absfile.open(mode='r', encoding='latin-1') as absf:
raw = absf.read()
modified = datetime.fromtimestamp(absfile.stat().st_mtime, tz=_get_tz())
modified = modified.astimezone(ZoneInfo("UTC"))
return parse_abs(raw, modified)
with file.open(mode='r', encoding='latin-1') as absf:
return parse_abs(absf.read(), file.updated)

except FileNotFoundError:
raise AbsNotFoundException
except UnicodeDecodeError as e:
raise AbsParsingException(f'Failed to decode .abs file "{filename}": {e}')
raise AbsParsingException(f'Failed to decode .abs file "{file}": {e}')



Expand Down
Loading