Skip to content

Commit

Permalink
Merge pull request #106 from arXiv/develop
Browse files Browse the repository at this point in the history
Pre-release merge for v0.2.5
  • Loading branch information
mhl10 committed Aug 8, 2019
2 parents 0db65c0 + 7e8ae6b commit 1490f9c
Show file tree
Hide file tree
Showing 21 changed files with 364 additions and 274 deletions.
2 changes: 1 addition & 1 deletion Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ mysqlclient = "==1.4.1"
"mmh3" = "*"
aiohttp = "*"
flask = "==1.0.2"
arxiv-base = "==0.15.9"
arxiv-base = "==0.16.1"
validators = "*"
mypy-extensions = "*"
flask-wtf = "*"
Expand Down
255 changes: 119 additions & 136 deletions Pipfile.lock

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion browse/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import dateutil.parser
from datetime import datetime, timedelta

APP_VERSION = '0.2.3'
APP_VERSION = '0.2.5'
"""The application version """

ON = 'yes'
Expand Down
17 changes: 7 additions & 10 deletions browse/controllers/cookies.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,10 @@
"""Handle requests to set cookies"""
"""Handle requests to set cookies."""

import re
from typing import Any, Dict, List, Optional, Tuple
from typing import Any, Dict, List
import copy

import flask
from flask import url_for, request, make_response
from werkzeug.exceptions import InternalServerError
from flask import url_for, request

from arxiv import status

Expand Down Expand Up @@ -56,10 +54,8 @@
# TODO implement debug parameter

def get_cookies_page(is_debug: bool) -> Any:
"""Render the cookies page.
Parameters
----------
"""
Render the cookies page.
Returns
-------
Expand All @@ -74,6 +70,7 @@ def get_cookies_page(is_debug: bool) -> Any:
------
:class:`.InternalServerError`
Raised when there was an unexpected problem executing the query.
"""
debug = {'debug': '1'} if is_debug else {
} # want to propogate debug to form URL
Expand All @@ -90,7 +87,7 @@ def get_cookies_page(is_debug: bool) -> Any:


def selected_options_from_request(configs: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
"""Sets the selected value on the options for the request cookies."""
"""Set the selected value on the options for the request cookies."""
cookies = request.cookies
for cc in configs:
request_value = cookies.get(cc['name'], None)
Expand Down
12 changes: 10 additions & 2 deletions browse/controllers/stats_page/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,10 +141,14 @@ def test_get_download_stats_csv(self, mock_get_monthly_download_stats) -> None:
self.assertEqual(code, status.HTTP_200_OK, 'Response should be OK.')
self.assertEqual(response_data['csv'], expected_response)

@mock.patch('browse.controllers.stats_page.get_document_count_by_yymm')
@mock.patch('browse.controllers.stats_page.get_monthly_submission_count')
def test_get_monthly_submissions_page(self, mock_get_monthly_submission_count) -> None: # type: ignore
def test_get_monthly_submissions_page(self,
mock_get_monthly_submission_count,
mock_get_document_count_by_yymm) -> None: # type: ignore
"""Tests for :func:`.get_monthly_submissions_page`."""
# test basic response
mock_get_document_count_by_yymm.return_value = 0
mock_get_monthly_submission_count.return_value = (0, 0)
response_data, code, headers = \
stats_page.get_monthly_submissions_page()
Expand Down Expand Up @@ -177,10 +181,14 @@ def test_get_monthly_submissions_page(self, mock_get_monthly_submission_count) -
self.assertIsInstance(response_data['current_dt'], datetime)
self.assertIsInstance(response_data['arxiv_start_dt'], datetime)

@mock.patch('browse.controllers.stats_page.get_document_count_by_yymm')
@mock.patch('browse.controllers.stats_page.get_monthly_submission_stats')
def test_get_submission_stats_csv(self, mock_get_monthly_submission_stats) -> None: # type: ignore
def test_get_submission_stats_csv(self,
mock_get_monthly_submission_stats,
mock_get_document_count_by_yymm) -> None: # type: ignore
"""Tests for :func:`.get_submission_stats_csv`."""
# test basic response
mock_get_document_count_by_yymm.return_value = 0
mock_get_monthly_submission_stats.return_value = list()
response_data, code, headers = stats_page.get_submission_stats_csv()
self.assertEqual(code, status.HTTP_200_OK, 'Response should be OK.')
Expand Down
20 changes: 11 additions & 9 deletions browse/controllers/year.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,20 @@
"""Handle requests for info about one year of archive activity"""
"""Handle requests for info about one year of archive activity."""

from datetime import date
from typing import Any, Dict, List, Optional, Tuple, Union
from typing import Any, Dict, List, Optional, Tuple

from werkzeug.exceptions import BadRequest
from flask import current_app, url_for
from flask import url_for

from arxiv import status, taxonomy
from browse.domain.listing import MonthCount, ListingCountResponse
from browse.domain.listing import MonthCount
from browse.controllers.list_page import get_listing_service
from browse.controllers.years_operating import years_operating, stats_by_year


def year_page(archive_id: str, year: int) -> Any:
"""Get year page for archive.
"""
Get year page for archive.
Parameters
----------
Expand All @@ -31,12 +32,13 @@ def year_page(archive_id: str, year: int) -> Any:
HTTP status code.
dict
Headers to add to the response.
"""
thisYear = date.today().year

if year is None:
year = thisYear

if year > thisYear:
# 307 because year might be valid in the future
return {}, status.HTTP_307_TEMPORARY_REDIRECT, {'Location': '/'}
Expand All @@ -57,8 +59,8 @@ def year_page(archive_id: str, year: int) -> Any:

for month in month_listing['month_counts']:
month['art'] = ascii_art_month(archive_id, month) # type: ignore
month['yymm'] =f"{month['year']}-{month['month']:02}" #type: ignore
month['url'] = url_for('browse.list_articles', #type: ignore
month['yymm'] = f"{month['year']}-{month['month']:02}" # type: ignore
month['url'] = url_for('browse.list_articles', # type: ignore
context=archive_id,
subcontext=f"{month['year']}{month['month']:02}")

Expand Down Expand Up @@ -87,7 +89,7 @@ def ascii_art_month(archive_id: str, month: MonthCount) -> List[Tuple[str, Optio
tot = month['new'] + month['cross']
yyyymm = f"{month['year']}{month['month']:02}"

def _makestep(idx:int) -> Tuple[str, Optional[str]]:
def _makestep(idx: int) -> Tuple[str, Optional[str]]:
if idx % ASCII_ART_URL_STEP == 0:
return (ASCII_ART_CHR,
url_for('browse.list_articles',
Expand Down
1 change: 1 addition & 0 deletions browse/routes/ui.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@

@blueprint.context_processor
def inject_now() -> None:
"""Inject current datetime into request context."""
return dict(request_datetime=datetime.now())


Expand Down
12 changes: 5 additions & 7 deletions browse/templates/home/home.html
Original file line number Diff line number Diff line change
Expand Up @@ -55,15 +55,13 @@
{{- group_section(('grp_physics','grp_math','grp_cs','grp_q-bio','grp_q-fin','grp_stat','grp_eess','grp_econ')) }}

<hr />
{#- TODO: remove because of new footer? -#}
<h2>About arXiv</h2>
<ul>
<li><a href="/help/general">General information</a> and <a href="/help/scientific_ad_board">Scientific Advisory Board</a></li>
<li><a href="/help/support">Support and Governance Model</a> and <a href="https://confluence.cornell.edu/x/NqlRF">Member Advisory Board</a></li>
<li><a href="/help/find">Find</a>, <a href="/help/view">view</a>, <a href="{{ url_for('subscribe') }}">email alerts</a> and <a href="/help/rss">RSS feeds</a></li>
<li><a href="/help/submit">Submission</a> and <a href="/help/moderation">moderation</a> details</li>
<li><a href="/help/stats">Usage statistics</a> and <a href="/new/">news</a></li>
<li>See also searchable <a href="{{ url_for('help') }}">help pages</a></li>
<li><a href="/about">General information</a></li>
<li><a href="/help/submit">How to Submit to arXiv</a></li>
<li><a href="/new">News</a></li>
<li><a href="/about/membership">Membership & Giving</a></li>
<li><a href="/about/people">Who We Are</a></li>
</ul>

{% endblock content -%}
Expand Down
2 changes: 0 additions & 2 deletions browse/templates/home/news.html
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
{#- News blurbs appear at the top of the home page. Generally there should be no more than four items. -#}
02 Jul 2019: <a href="http://bit.ly/arXivUXSpecialist3">We are hiring: arXiv User Experience Specialist.</a><br/>
12 Jun 2019: <a href="http://bit.ly/arXivExecutiveDirector3">We are hiring: Executive Director of arXiv.</a><br/>
11 Jun 2019: <a href="https://arxiv.org/new/#june11_2019">Announcing a new category and category mergers.</a><br/>
20 May 2019: <a href="http://bit.ly/arXivEngineer4">We are hiring: arXiv Service Reliability Engineer.</a><br/>
1 change: 1 addition & 0 deletions docs/source/conf.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
"""Configuration file for the Sphinx documentation builder."""
# -*- coding: utf-8 -*-
#
# Configuration file for the Sphinx documentation builder.
Expand Down
29 changes: 29 additions & 0 deletions tests/data/abs_files/ftp/arxiv/papers/1702/1702.00249.abs
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
------------------------------------------------------------------------------
\\
arXiv:1702.00249
From: Martin Eker{\aa} <ARXIVOPS-805@example.org>
Date: Wed, 1 Feb 2017 13:41:33 GMT (13kb,D)

Title: Quantum algorithms for computing short discrete logarithms and factoring
RSA integers
Authors: Martin Eker{\aa} and Johan H{\aa}stad
Categories: cs.CR quant-ph
License: http://arxiv.org/licenses/nonexclusive-distrib/1.0/
\\
In this paper we generalize the quantum algorithm for computing short
discrete logarithms previously introduced by Eker{\aa} so as to allow for
various tradeoffs between the number of times that the algorithm need be
executed on the one hand, and the complexity of the algorithm and the
requirements it imposes on the quantum computer on the other hand.
Furthermore, we describe applications of algorithms for computing short
discrete logarithms. In particular, we show how other important problems such
as those of factoring RSA integers and of finding the order of groups under
side information may be recast as short discrete logarithm problems. This
immediately gives rise to an algorithm for factoring RSA integers that is less
complex than Shor's general factoring algorithm in the sense that it imposes
smaller requirements on the quantum computer.
In both our algorithm and Shor's algorithm, the main hurdle is to compute a
modular exponentiation in superposition. When factoring an n bit integer, the
exponent is of length 2n bits in Shor's algorithm, compared to slightly more
than n/2 bits in our algorithm.
\\
1 change: 1 addition & 0 deletions tests/legacy_comparison/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""Contains legacy comparison tests."""
34 changes: 24 additions & 10 deletions tests/legacy_comparison/abs_page_comparison.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
"""Abs page comparison tests."""
import argparse
import itertools
import sys
Expand Down Expand Up @@ -54,7 +55,7 @@
To skip ancillary file comparisons: '--skip-ancillary'
Improvements:
Better reporting format, right now the comparisons produce just strings.
Better reporting format, right now the comparisons produce just strings.
"""

logging.basicConfig(filename="abs_page_comparison.log", level=logging.DEBUG)
Expand Down Expand Up @@ -110,6 +111,7 @@ def _paperid_generator_from_file(path: str, excluded: List[str])->Iterator[str]:


def paperid_generator(path: str, excluded: List[str]) -> Iterator[str]:
"""Generate an arXiv paper ID."""
for ( dir_name, subdir_list, file_list) in os.walk(path):
for fname in file_list:
fname_path = os.path.join(dir_name, fname)
Expand Down Expand Up @@ -146,6 +148,7 @@ def paperid_iterator(path: str, excluded: List[str]) -> List[str]:


def fetch_abs(compare_res_fn: Callable[[res_arg_dict], List[BadResult]], paper_id: str) -> Tuple[Dict, List[BadResult]]:
"""Fetch an abs page."""
ng_url = ng_abs_base_url + paper_id
legacy_url = legacy_abs_base_url + paper_id

Expand All @@ -163,8 +166,12 @@ def fetch_abs(compare_res_fn: Callable[[res_arg_dict], List[BadResult]], paper_i


def run_compare_response(skips: Set[str], res_args: res_arg_dict) -> Iterator[BadResult]:
""" This is also where we do most of the cleaning on text, for things
we know that we do not want to compare."""
"""
Compare responses.
This is also where we do most of the cleaning on text, for things
we know that we do not want to compare.
"""
legacy_text = res_args['legacy_res'].text
ng_text = res_args['ng_res'].text

Expand All @@ -188,12 +195,13 @@ def call_it(fn: Callable[[text_arg_dict], BadResult]) -> BadResult:
return BadResult(res_args['paper_id'], 'run_compare_response', traceback.format_exc())

logging.debug(f"about to do compares for {res_args['paper_id']}")

return filter(None, itertools.chain(
map(call_it, res_comparisons), run_compare_text(text_dict)))


def run_compare_text(text_args: text_arg_dict) -> Iterator[BadResult]:
"""Run the text comparison."""
html_dict = process_text(text_args)

def call_it(fn: Callable[[html_arg_dict], BadResult]) -> BadResult:
Expand All @@ -208,8 +216,9 @@ def call_it(fn: Callable[[html_arg_dict], BadResult]) -> BadResult:


def run_compare_html(html_args: html_arg_dict) -> Iterator[BadResult]:
"""Run comparison against HTML."""
logging.debug(f'about to run HTML compares for {html_args["paper_id"]}')

def call_it(fn: Callable[[html_arg_dict], BadResult]) -> BadResult:
# noinspection PyBroadException
try:
Expand All @@ -223,10 +232,12 @@ def call_it(fn: Callable[[html_arg_dict], BadResult]) -> BadResult:


def rm_email_hash(text: str) -> str:
"""Remove the hash from the email link."""
return re.sub(r'show-email/\w+/', 'show-email/', text)


def process_text(text_args: text_arg_dict) -> html_arg_dict:
"""Process text for comparison."""
text_args['ng_text'] = ' '.join(text_args['ng_text'].split())
text_args['legacy_text'] = ' '.join(text_args['legacy_text'].split())

Expand All @@ -242,6 +253,7 @@ def process_text(text_args: text_arg_dict) -> html_arg_dict:


def main() -> None:
"""Run the abs page comparison with provided arguments."""
parser = argparse.ArgumentParser(
description='Compare ng browse to legacy browse')
parser.add_argument('--ids', default=False, )
Expand Down Expand Up @@ -302,22 +314,23 @@ def done()->bool:

def done_job( job ):
(config, bad_results) = job
logging.debug(f"completed {config['paper_id']}")
visited_fh.write(f"{config['paper_id']}\n")
logging.debug(f"completed {config['paper_id']}")
visited_fh.write(f"{config['paper_id']}\n")
write_comparison(report_fh, (config,bad_results))
if done():
logging.info("done and existing")
exit(0)

[done_job(job) for job in completed_jobs]


def _serialize(obj):
"""JSON serializer for objects not serializable by default json code"""
"""JSON serializer for objects not serializable by default json code."""
return obj.__dict__


def write_comparison(report_fh, result: Tuple[Dict, List[BadResult]])-> None:
"""Write comparison output."""
(config, bad_results) = result
logging.debug("writing report for %s", config['paper_id'])
if bad_results:
Expand All @@ -332,6 +345,7 @@ def write_comparison(report_fh, result: Tuple[Dict, List[BadResult]])-> None:


def format_bad_result(bad: BadResult)->str:
"""Format the BadResult object to a readable string."""
rpt = f"** {bad.comparison}\n" \
f"{bad.message} "
if bad.similarity:
Expand All @@ -346,7 +360,7 @@ def format_bad_result(bad: BadResult)->str:


def strip_by_delim(text: str, start: str, end: str) -> str:

"""Strip text by delimiter."""
if (start in text) and (end in text):
def find_start() -> int:
return text.index(start)
Expand Down
4 changes: 3 additions & 1 deletion tests/legacy_comparison/abstract_comparisons.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,16 @@
"""Includes function to compare abstracts."""
from weighted_levenshtein import lev


def lev_similarity(aa: str, bb: str) -> float:
"""
Get a Levenshtein similarity score.
:param aa: first string
:param bb: second string
:return: The similarity of the two strings (0=bad, 1=match):
1- lev(aa,bb)/max(len(aa), len(bb))
"""

# Since weighted levenshtein can't handle unicode,
# convert to ASCII first:

Expand Down
Loading

0 comments on commit 1490f9c

Please sign in to comment.