From 0d5f061c0b6f07027d8b03f9d0ab0eb05e3fcb19 Mon Sep 17 00:00:00 2001 From: Mark Nazzaro Date: Fri, 10 May 2024 11:30:53 -0400 Subject: [PATCH 1/2] Use file obj instead of path --- arxiv/document/parse_abs.py | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/arxiv/document/parse_abs.py b/arxiv/document/parse_abs.py index a16512eb..f6a2e677 100644 --- a/arxiv/document/parse_abs.py +++ b/arxiv/document/parse_abs.py @@ -10,6 +10,7 @@ from dateutil import parser from ..taxonomy.definitions import ARCHIVES, CATEGORIES +from ..files import FileObj, FileDoesNotExist from .metadata import AuthorList, DocMetadata, Submitter from ..config import settings from .version import VersionEntry, SourceFlag @@ -63,26 +64,23 @@ """FS timezone if in a flask app.""" -def parse_abs_file(filename: str) -> DocMetadata: +def parse_abs_file(file: FileObj) -> DocMetadata: """Parse an arXiv .abs file from the local FS. The modified time on the abs file will be used as the modified time for the abstract. It will be pulled from `flask.config` if in a app_context. It can be specified with tz arg. """ - - absfile = Path(filename) + if isinstance(file, FileDoesNotExist): + raise AbsNotFoundException try: - with absfile.open(mode='r', encoding='latin-1') as absf: - raw = absf.read() - modified = datetime.fromtimestamp(absfile.stat().st_mtime, tz=_get_tz()) - modified = modified.astimezone(ZoneInfo("UTC")) - return parse_abs(raw, modified) + with file.open(mode='r', encoding='latin-1') as absf: + return parse_abs(absf.read(), file.updated()) except FileNotFoundError: raise AbsNotFoundException except UnicodeDecodeError as e: - raise AbsParsingException(f'Failed to decode .abs file "{filename}": {e}') + raise AbsParsingException(f'Failed to decode .abs file "{file}": {e}') From 6e687b5dd9a3efc2946ce1bf45039932ddbceb97 Mon Sep 17 00:00:00 2001 From: Mark Nazzaro Date: Fri, 10 May 2024 11:48:55 -0400 Subject: [PATCH 2/2] updated is a property --- arxiv/document/parse_abs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arxiv/document/parse_abs.py b/arxiv/document/parse_abs.py index f6a2e677..1d3ce9d7 100644 --- a/arxiv/document/parse_abs.py +++ b/arxiv/document/parse_abs.py @@ -75,7 +75,7 @@ def parse_abs_file(file: FileObj) -> DocMetadata: raise AbsNotFoundException try: with file.open(mode='r', encoding='latin-1') as absf: - return parse_abs(absf.read(), file.updated()) + return parse_abs(absf.read(), file.updated) except FileNotFoundError: raise AbsNotFoundException