Skip to content

Commit

Permalink
Merge pull request #97 from arXiv/develop
Browse files Browse the repository at this point in the history
Pre-release merge for v0.2.2
  • Loading branch information
mhl10 authored May 16, 2019
2 parents 392a5f2 + bea9823 commit 1c87648
Show file tree
Hide file tree
Showing 39 changed files with 1,868 additions and 132 deletions.
4 changes: 2 additions & 2 deletions .coveragerc
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@ omit =
tests/legacy_comparison/*
wsgi.py
populate_test_database.py

upload_static_assets.py

[report]

# Exit nose2 with failed if coverage under this percent
fail_under = 80
fail_under = 80
3 changes: 2 additions & 1 deletion Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,14 @@ mysqlclient = "==1.4.1"
"mmh3" = "*"
aiohttp = "*"
flask = "==1.0.2"
arxiv-base = "==0.15.6"
arxiv-base = "==0.15.7"
validators = "*"
mypy-extensions = "*"
flask-wtf = "*"
arxiv-auth = "==0.3.1"
mypy = "*"
jinja2 = "==2.10.1"
flask-s3 = "*"

[dev-packages]
pylama = "*"
Expand Down
45 changes: 20 additions & 25 deletions Pipfile.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 5 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,11 @@ Path to cache directory:
arXiv Labs options:
* `LABS_BIBEXPLORER_ENABLED`

### Serving static files on S3

We use [Flask-S3](https://flask-s3.readthedocs.io/en/latest/) to serve static
files via S3. Following the instructions for Flask-S3 should just work.

### Test suite

Before running the test suite, install the dev packages:
Expand Down
22 changes: 17 additions & 5 deletions browse/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import dateutil.parser
from datetime import datetime, timedelta

VERSION = '0.2.1'
APP_VERSION = '0.2.2'
"""The application version """

ON = 'yes'
Expand Down Expand Up @@ -204,6 +204,16 @@
to be loaded.
"""

"""
Flask-S3 plugin settings.
See `<https://flask-s3.readthedocs.io/en/latest/>`_.
"""
FLASKS3_BUCKET_NAME = os.environ.get('FLASKS3_BUCKET_NAME', 'some_bucket')
FLASKS3_CDN_DOMAIN = os.environ.get('FLASKS3_CDN_DOMAIN', 'static.arxiv.org')
FLASKS3_USE_HTTPS = os.environ.get('FLASKS3_USE_HTTPS', 1)
FLASKS3_FORCE_MIMETYPE = os.environ.get('FLASKS3_FORCE_MIMETYPE', 1)
FLASKS3_ACTIVE = os.environ.get('FLASKS3_ACTIVE', 0)

# SQLAlchemy configuration
# For mysql: 'mysql://user:pass@localhost/dbname'
SQLALCHEMY_DATABASE_URI = os.environ.get('BROWSE_SQLALCHEMY_DATABASE_URI',
Expand Down Expand Up @@ -268,23 +278,25 @@
BROWSE_ANALYTICS_SITE_ID = os.environ.get('BROWSE_ANALYTICS_SITE_ID', '1')
"""Tracker site ID."""

BROWSE_USER_BANNER_ENABLED = os.environ.get(
'BROWSE_USER_BANNER_ENABLED', False)
BROWSE_USER_BANNER_ENABLED = bool(int(os.environ.get(
'BROWSE_USER_BANNER_ENABLED', '0')))
"""Enable/disable user banner."""
try:
BROWSE_USER_BANNER_START_DATE = dateutil.parser.parse(
os.environ.get('BROWSE_USER_BANNER_START_DATE')
).replace(hour=0, minute=0, second=0)
except Exception:
warnings.warn("Bad value for BROWSE_USER_BANNER_START_DATE")
if BROWSE_USER_BANNER_ENABLED:
warnings.warn("Bad value for BROWSE_USER_BANNER_START_DATE")
BROWSE_USER_BANNER_START_DATE = datetime.now() - timedelta(days=1)

try:
BROWSE_USER_BANNER_END_DATE = dateutil.parser.parse(
os.environ.get('BROWSE_USER_BANNER_END_DATE')
).replace(hour=23, minute=59, second=59)
except Exception:
warnings.warn("Bad value for BROWSE_USER_BANNER_END_DATE")
if BROWSE_USER_BANNER_ENABLED:
warnings.warn("Bad value for BROWSE_USER_BANNER_END_DATE")
BROWSE_USER_BANNER_END_DATE = datetime.now() + timedelta(days=1)

DOCUMENT_LATEST_VERSIONS_PATH = os.environ.get(
Expand Down
12 changes: 9 additions & 3 deletions browse/controllers/stats_page/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""Handle requests to display and return stats about the arXiv service."""

import dateutil.parser
from datetime import datetime, timedelta
from datetime import date, datetime, timedelta
from typing import Any, Dict, Optional, Tuple
from werkzeug.exceptions import InternalServerError, BadRequest

Expand All @@ -10,7 +10,7 @@
from browse.services.database import get_hourly_stats, get_hourly_stats_count, \
get_monthly_download_stats, get_monthly_submission_stats, \
get_monthly_submission_count, get_monthly_download_count, \
get_max_download_stats_dt
get_max_download_stats_dt, get_document_count_by_yymm
from browse.services.document.config.deleted_papers import DELETED_PAPERS


Expand Down Expand Up @@ -140,11 +140,17 @@ def get_monthly_submissions_page() -> Response:
def get_submission_stats_csv() -> Response:
"""Get submission stats in CSV format."""
csv_head = "month,submissions,historical_delta\n"
current_date = date.today()
try:
rows = get_monthly_submission_stats()
csv_data = "".join([
f"{r.ym.strftime('%Y-%m')},{r.num_submissions},{r.historical_delta}\n"
for r in get_monthly_submission_stats()
for r in rows
])
if rows and rows[-1].ym < current_date:
this_month_count = get_document_count_by_yymm(current_date)
if this_month_count > 0:
csv_data = csv_data + f"{current_date.strftime('%Y-%m')},{this_month_count},0\n"
return {'csv': csv_head + csv_data}, status.HTTP_200_OK, {'Content-Type': 'text/csv'}
except Exception as ex:
logger.warning(f'Error getting monthly submission stats csv: {ex}')
Expand Down
4 changes: 2 additions & 2 deletions browse/controllers/stats_page/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,9 +191,9 @@ def test_get_submission_stats_csv(self, mock_get_monthly_submission_stats) -> No

# test response with mock data
mock_get_monthly_submission_stats.return_value = [
mock.Mock(ym=datetime(2019, 2, 1),
mock.Mock(ym=date(2019, 2, 1),
num_submissions=9999, historical_delta=-42),
mock.Mock(ym=datetime(2019, 3, 1),
mock.Mock(ym=date(2019, 3, 1),
num_submissions=10101, historical_delta=0)
]
expected_response = "month,submissions,historical_delta\n"\
Expand Down
13 changes: 8 additions & 5 deletions browse/factory.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,25 @@
"""Application factory for browse service components."""
from functools import partial
from flask import Flask, g
from flask import Flask
from flask_s3 import FlaskS3

from arxiv.base.urls import canonical_url, clickthrough_url, urlizer
from browse.config import APP_VERSION
from browse.routes import ui
from browse.services.database import models
from browse.services.util.email import generate_show_email_hash
from browse.filters import entity_to_utf
from browse.services.listing.fake_listings import FakeListingFilesService

from arxiv.base.config import BASE_SERVER
from arxiv.base import Base
from arxiv.users.auth import Auth

s3 = FlaskS3()


def create_web_app() -> Flask:
"""Initialize an instance of the browse web application."""
app = Flask('browse', static_folder='static', template_folder='templates')
app = Flask('browse', static_url_path=f'/static/browse/{APP_VERSION}')
app.config.from_pyfile('config.py') # type: ignore

# TODO Only needed until this route is added to arxiv-base
Expand All @@ -26,11 +29,11 @@ def create_web_app() -> Flask:
('search_archive', '/search/<archive>', BASE_SERVER))

models.init_app(app) # type: ignore

Base(app)
Auth(app)
app.register_blueprint(ui.blueprint)

s3.init_app(app)

if not app.jinja_env.globals:
app.jinja_env.globals = {}

Expand Down
13 changes: 12 additions & 1 deletion browse/routes/ui.py
Original file line number Diff line number Diff line change
Expand Up @@ -310,7 +310,7 @@ def form(arxiv_id: str) -> Response:


@blueprint.route('archive/', defaults={'archive': None})
@blueprint.route('archive/<archive>')
@blueprint.route('archive/<archive>', strict_slashes=False)
def archive(archive: str): # type: ignore
"""Landing page for an archive."""
response, code, headers = archive_page.get_archive(archive) # type: ignore
Expand All @@ -323,6 +323,17 @@ def archive(archive: str): # type: ignore
return response, code, headers


@blueprint.route('archive/<archive>/<junk>', strict_slashes=False)
def archive_with_extra(archive: str, junk: str): # type: ignore
"""
Archive page with extra, 301 redirect to just the archive.
This handles some odd URLs that have ended up in search engines.
See also ARXIVOPS-2119.
"""
return redirect(url_for('browse.archive', archive=archive), code=301)


@blueprint.route('year/<archive>', defaults={'year': None})
@blueprint.route('year/<archive>/', defaults={'year': None}, strict_slashes=False)
@blueprint.route('year/<archive>/<int:year>/')
Expand Down
18 changes: 17 additions & 1 deletion browse/services/database/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,22 @@ def get_document_count() -> Optional[int]:
return row.num_documents


@db_handle_error(logger=logger, default_return_val=0)
def get_document_count_by_yymm(paper_date: Optional[date] = None) -> int:
"""Get number of papers for a given year and month."""
paper_date = date.today() if not isinstance(paper_date, date) \
else paper_date
yymm = paper_date.strftime('%y%m')
yymm_like = f'{yymm}%'
if paper_date < date(2007, 4, 1):
yymm_like = f'%/{yymm}%'
row = db.session.query(
func.count(Document.document_id).label('num_documents')
).filter(Document.paper_id.like(yymm_like))\
.filter(not_(Document.paper_id.like('test%'))).first()
return row.num_documents


@db_handle_error(logger=logger, default_return_val=None)
def get_sequential_id(paper_id: Identifier,
context: str = 'all',
Expand Down Expand Up @@ -298,7 +314,7 @@ def get_hourly_stats(stats_date: Optional[date] = None) -> List:

@db_handle_error(logger=logger, default_return_val=[])
def get_monthly_submission_stats() -> List:
"""Get the monthly submission stats."""
"""Get monthly submission stats from :class:`.StatsMonthlySubmission`."""
return list(db.session.query(StatsMonthlySubmission).
order_by(asc(StatsMonthlySubmission.ym)).all())

Expand Down
10 changes: 9 additions & 1 deletion browse/services/search/search_authors.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,14 +49,22 @@ def split_long_author_list(
count = 0
back_count = 0
for item in authors:
if count > size:
if count >= size:
back.append(item)
if isinstance(item, tuple):
back_count = back_count + 1
else:
front.append(item)
if isinstance(item, tuple):
count = count + 1

# handle case where back doesn't have much ARXIVNG-2083
authors_in_back = len(list(filter(lambda x: isinstance(x, tuple), back)))
if authors_in_back < 2:
front = front + back
back = []
back_count = 0

return front, back, back_count


Expand Down
Loading

0 comments on commit 1c87648

Please sign in to comment.