Skip to content

Commit

Permalink
Merge pull request #267 from arXiv/develop
Browse files Browse the repository at this point in the history
Docx format option changes + db isolation level environment variable
  • Loading branch information
mnazzaro authored May 10, 2024
2 parents 3e2d405 + f707487 commit 663af66
Show file tree
Hide file tree
Showing 6 changed files with 33 additions and 11 deletions.
3 changes: 1 addition & 2 deletions arxiv/base/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,5 @@ def register_blueprint(self: Flask, blueprint: Blueprint,
# It is the same as the flask_sqlalchemy implementation
# See: https://github.com/pallets-eco/flask-sqlalchemy/blob/42a36a3cb604fd39d81d00b54ab3988bbd0ad184/src/flask_sqlalchemy/session.py#L109
@app.teardown_appcontext
def remove_scoped_session (response_or_exc):
def remove_scoped_session (response_or_exc: BaseException | None) -> None:
session.remove()
return response_or_exc
7 changes: 7 additions & 0 deletions arxiv/config/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import importlib.metadata
from typing import Optional, List, Tuple
import os
from sqlalchemy.engine.interfaces import IsolationLevel
from secrets import token_hex
from urllib.parse import urlparse
from pydantic import BaseSettings, SecretStr
Expand Down Expand Up @@ -171,5 +172,11 @@ class Settings(BaseSettings):
CLASSIC_DB_URI: str = DEFAULT_DB
LATEXML_DB_URI: str = DEFAULT_LATEXML_DB
ECHO_SQL: bool = False
CLASSIC_DB_TRANSACTION_ISOLATION_LEVEL: Optional[IsolationLevel] = None
LATEXML_DB_TRANSACTION_ISOLATION_LEVEL: Optional[IsolationLevel] = None

REQUEST_CONCURRENCY: int = 32
""" How many requests do we handle at once -> How many db connections should we be able to open at once """
POOL_PRE_PING: bool = True
""" Liveness check of sqlalchemy connections before checking out of pool """
settings = Settings()
18 changes: 13 additions & 5 deletions arxiv/db/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
with transaction() as session:
session.add(...)
"""
from typing import Generator
from typing import Optional
import logging
from contextlib import contextmanager

Expand All @@ -41,7 +41,7 @@

from sqlalchemy import create_engine, MetaData, String
from sqlalchemy.orm import sessionmaker, scoped_session, DeclarativeBase

from sqlalchemy.engine.interfaces import IsolationLevel

from ..config import settings

Expand All @@ -56,9 +56,17 @@ class LaTeXMLBase(DeclarativeBase):
logger = logging.getLogger(__name__)

engine = create_engine(settings.CLASSIC_DB_URI,
echo=settings.ECHO_SQL)
echo=settings.ECHO_SQL,
isolation_level=settings.CLASSIC_DB_TRANSACTION_ISOLATION_LEVEL,
pool_recycle=600,
max_overflow=(settings.REQUEST_CONCURRENCY - 5), # max overflow is how many + base pool size, which is 5 by default
pool_pre_ping=settings.POOL_PRE_PING)
latexml_engine = create_engine(settings.LATEXML_DB_URI,
echo=settings.ECHO_SQL)
echo=settings.ECHO_SQL,
isolation_level=settings.LATEXML_DB_TRANSACTION_ISOLATION_LEVEL,
pool_recycle=600,
max_overflow=(settings.REQUEST_CONCURRENCY - 5),
pool_pre_ping=settings.POOL_PRE_PING)
SessionLocal = sessionmaker(autocommit=False, autoflush=False)

def _app_ctx_id () -> int:
Expand All @@ -77,7 +85,7 @@ def get_db ():
@contextmanager
def transaction ():
in_flask = True if has_app_context() else False
db = session if in_flask else SessionLocal()
db = session if in_flask else SessionLocal()
try:
yield db

Expand Down
4 changes: 2 additions & 2 deletions arxiv/document/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -291,10 +291,10 @@ def raw(self) -> str:
# skipping proxy to avoid harvesting of email addresses
if self.report_num:
rv += f"Report-no: {self.report_num}\n"
if self.msc_class:
rv += f"MSC-class: {self.msc_class}\n"
if self.acm_class:
rv += f"ACM-class: {self.acm_class}\n"
if self.msc_class:
rv += f"MSC-class: {self.msc_class}\n"
if self.journal_ref:
rv += f"Journal-ref: {self.journal_ref}\n"
if self.doi:
Expand Down
2 changes: 1 addition & 1 deletion arxiv/document/version.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ def formats(self) -> List[str]:
elif self.source_flag.html or self.source_format == "html":
formats.extend(['html'])
elif self.source_flag.docx or self.source_format == "docx":
formats.extend(['pdf'])
formats.extend(['pdf', 'docx'])
else:
formats.extend(['pdf', 'ps', 'src'])

Expand Down
10 changes: 9 additions & 1 deletion arxiv/files/object_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,14 @@

from google.cloud.storage.blob import Blob
from google.cloud.storage.bucket import Bucket
from google.cloud.storage.retry import DEFAULT_RETRY

from . import FileObj

GCS_RETRY = DEFAULT_RETRY \
.with_deadline(12) \
.with_delay(0.25, 2.5)

class ObjectStore(ABC):
"""ABC for an object store."""

Expand Down Expand Up @@ -106,7 +111,10 @@ def to_obj(self, key: str) -> FileObj:
Returns `FileDoesNotExist` if there is no object at the key.
"""
blob = self.bucket.get_blob(key)
try:
blob = self.bucket.get_blob(key, retry=GCS_RETRY)
except:
blob = None
if not blob:
return FileDoesNotExist("gs://" + self.bucket.name + '/' + key)
else:
Expand Down

0 comments on commit 663af66

Please sign in to comment.