Merge pull request #106 from arXiv/develop

Pre-release merge for v0.2.5
arXiv · Aug 8, 2019 · 1490f9c · 1490f9c
2 parents 0db65c0 + 7e8ae6b
commit 1490f9c
Show file tree

Hide file tree

Showing 21 changed files with 364 additions and 274 deletions.
diff --git a/Pipfile b/Pipfile
@@ -15,7 +15,7 @@ mysqlclient = "==1.4.1"
 "mmh3" = "*"
 aiohttp = "*"
 flask = "==1.0.2"
-arxiv-base = "==0.15.9"
+arxiv-base = "==0.16.1"
 validators = "*"
 mypy-extensions = "*"
 flask-wtf = "*"

diff --git a/Pipfile.lock b/Pipfile.lock
diff --git a/browse/config.py b/browse/config.py
@@ -8,7 +8,7 @@
 import dateutil.parser
 from datetime import datetime, timedelta
 
-APP_VERSION = '0.2.3'
+APP_VERSION = '0.2.5'
 """The application version """
 
 ON = 'yes'

diff --git a/browse/controllers/cookies.py b/browse/controllers/cookies.py
@@ -1,12 +1,10 @@
-"""Handle requests to set cookies"""
+"""Handle requests to set cookies."""
 
-import re
-from typing import Any, Dict, List, Optional, Tuple
+from typing import Any, Dict, List
 import copy
 
 import flask
-from flask import url_for, request, make_response
-from werkzeug.exceptions import InternalServerError
+from flask import url_for, request
 
 from arxiv import status
 
@@ -56,10 +54,8 @@
 # TODO implement debug parameter
 
 def get_cookies_page(is_debug: bool) -> Any:
-    """Render the cookies page.
-
-    Parameters
-    ----------
+    """
+    Render the cookies page.
 
     Returns
     -------
@@ -74,6 +70,7 @@ def get_cookies_page(is_debug: bool) -> Any:
     ------
     :class:`.InternalServerError`
         Raised when there was an unexpected problem executing the query.
+
     """
     debug = {'debug': '1'} if is_debug else {
     }  # want to propogate debug to form URL
@@ -90,7 +87,7 @@ def get_cookies_page(is_debug: bool) -> Any:
 
 
 def selected_options_from_request(configs: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
-    """Sets the selected value on the options for the request cookies."""
+    """Set the selected value on the options for the request cookies."""
     cookies = request.cookies
     for cc in configs:
         request_value = cookies.get(cc['name'], None)

diff --git a/browse/controllers/stats_page/tests.py b/browse/controllers/stats_page/tests.py
@@ -141,10 +141,14 @@ def test_get_download_stats_csv(self, mock_get_monthly_download_stats) -> None:
         self.assertEqual(code, status.HTTP_200_OK, 'Response should be OK.')
         self.assertEqual(response_data['csv'], expected_response)
 
+    @mock.patch('browse.controllers.stats_page.get_document_count_by_yymm')
     @mock.patch('browse.controllers.stats_page.get_monthly_submission_count')
-    def test_get_monthly_submissions_page(self, mock_get_monthly_submission_count) -> None:  # type: ignore
+    def test_get_monthly_submissions_page(self,
+                                          mock_get_monthly_submission_count,
+                                          mock_get_document_count_by_yymm) -> None:  # type: ignore
         """Tests for :func:`.get_monthly_submissions_page`."""
         # test basic response
+        mock_get_document_count_by_yymm.return_value = 0
         mock_get_monthly_submission_count.return_value = (0, 0)
         response_data, code, headers = \
             stats_page.get_monthly_submissions_page()
@@ -177,10 +181,14 @@ def test_get_monthly_submissions_page(self, mock_get_monthly_submission_count) -
         self.assertIsInstance(response_data['current_dt'], datetime)
         self.assertIsInstance(response_data['arxiv_start_dt'], datetime)
 
+    @mock.patch('browse.controllers.stats_page.get_document_count_by_yymm')
     @mock.patch('browse.controllers.stats_page.get_monthly_submission_stats')
-    def test_get_submission_stats_csv(self, mock_get_monthly_submission_stats) -> None:  # type: ignore
+    def test_get_submission_stats_csv(self,
+                                      mock_get_monthly_submission_stats,
+                                      mock_get_document_count_by_yymm) -> None:  # type: ignore
         """Tests for :func:`.get_submission_stats_csv`."""
         # test basic response
+        mock_get_document_count_by_yymm.return_value = 0
         mock_get_monthly_submission_stats.return_value = list()
         response_data, code, headers = stats_page.get_submission_stats_csv()
         self.assertEqual(code, status.HTTP_200_OK, 'Response should be OK.')

diff --git a/browse/controllers/year.py b/browse/controllers/year.py
@@ -1,19 +1,20 @@
-"""Handle requests for info about one year of archive activity"""
+"""Handle requests for info about one year of archive activity."""
 
 from datetime import date
-from typing import Any, Dict, List, Optional, Tuple, Union
+from typing import Any, Dict, List, Optional, Tuple
 
 from werkzeug.exceptions import BadRequest
-from flask import current_app, url_for
+from flask import url_for
 
 from arxiv import status, taxonomy
-from browse.domain.listing import MonthCount, ListingCountResponse
+from browse.domain.listing import MonthCount
 from browse.controllers.list_page import get_listing_service
 from browse.controllers.years_operating import years_operating, stats_by_year
 
 
 def year_page(archive_id: str, year: int) -> Any:
-    """Get year page for archive.
+    """
+    Get year page for archive.
 
     Parameters
     ----------
@@ -31,12 +32,13 @@ def year_page(archive_id: str, year: int) -> Any:
         HTTP status code.
     dict
         Headers to add to the response.
+
     """
     thisYear = date.today().year
 
     if year is None:
         year = thisYear
-        
+
     if year > thisYear:
         # 307 because year might be valid in the future
         return {}, status.HTTP_307_TEMPORARY_REDIRECT, {'Location': '/'}
@@ -57,8 +59,8 @@ def year_page(archive_id: str, year: int) -> Any:
 
     for month in month_listing['month_counts']:
         month['art'] = ascii_art_month(archive_id, month)  # type: ignore
-        month['yymm'] =f"{month['year']}-{month['month']:02}"  #type: ignore
-        month['url'] = url_for('browse.list_articles', #type: ignore
+        month['yymm'] = f"{month['year']}-{month['month']:02}"  # type: ignore
+        month['url'] = url_for('browse.list_articles',  # type: ignore
                                context=archive_id,
                                subcontext=f"{month['year']}{month['month']:02}")
 
@@ -87,7 +89,7 @@ def ascii_art_month(archive_id: str, month: MonthCount) -> List[Tuple[str, Optio
     tot = month['new'] + month['cross']
     yyyymm = f"{month['year']}{month['month']:02}"
 
-    def _makestep(idx:int) -> Tuple[str, Optional[str]]:
+    def _makestep(idx: int) -> Tuple[str, Optional[str]]:
         if idx % ASCII_ART_URL_STEP == 0:
             return (ASCII_ART_CHR,
                     url_for('browse.list_articles',

diff --git a/browse/routes/ui.py b/browse/routes/ui.py
@@ -24,6 +24,7 @@
 
 @blueprint.context_processor
 def inject_now() -> None:
+    """Inject current datetime into request context."""
     return dict(request_datetime=datetime.now())
 
 

diff --git a/browse/templates/home/home.html b/browse/templates/home/home.html
@@ -55,15 +55,13 @@
 {{- group_section(('grp_physics','grp_math','grp_cs','grp_q-bio','grp_q-fin','grp_stat','grp_eess','grp_econ')) }}
 
 <hr />
-{#- TODO: remove because of new footer? -#}
 <h2>About arXiv</h2>
 <ul>
-  <li><a href="/help/general">General information</a> and <a href="/help/scientific_ad_board">Scientific Advisory Board</a></li>
-  <li><a href="/help/support">Support and Governance Model</a> and <a href="https://confluence.cornell.edu/x/NqlRF">Member Advisory Board</a></li>
-  <li><a href="/help/find">Find</a>, <a href="/help/view">view</a>, <a href="{{ url_for('subscribe') }}">email alerts</a> and <a href="/help/rss">RSS feeds</a></li>
-  <li><a href="/help/submit">Submission</a> and <a href="/help/moderation">moderation</a> details</li>
-  <li><a href="/help/stats">Usage statistics</a> and <a href="/new/">news</a></li>
-  <li>See also searchable <a href="{{ url_for('help') }}">help pages</a></li>
+  <li><a href="/about">General information</a></li>
+  <li><a href="/help/submit">How to Submit to arXiv</a></li>
+  <li><a href="/new">News</a></li>
+  <li><a href="/about/membership">Membership & Giving</a></li>
+  <li><a href="/about/people">Who We Are</a></li>
 </ul>
 
 {% endblock content -%}

diff --git a/browse/templates/home/news.html b/browse/templates/home/news.html
@@ -1,5 +1,3 @@
 {#- News blurbs appear at the top of the home page. Generally there should be no more than four items. -#}
-02 Jul 2019: <a href="http://bit.ly/arXivUXSpecialist3">We are hiring: arXiv User Experience Specialist.</a><br/>
 12 Jun 2019: <a href="http://bit.ly/arXivExecutiveDirector3">We are hiring: Executive Director of arXiv.</a><br/>
 11 Jun 2019: <a href="https://arxiv.org/new/#june11_2019">Announcing a new category and category mergers.</a><br/>
-20 May 2019: <a href="http://bit.ly/arXivEngineer4">We are hiring: arXiv Service Reliability Engineer.</a><br/>
diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -1,3 +1,4 @@
+"""Configuration file for the Sphinx documentation builder."""
 # -*- coding: utf-8 -*-
 #
 # Configuration file for the Sphinx documentation builder.

diff --git a/tests/data/abs_files/ftp/arxiv/papers/1702/1702.00249.abs b/tests/data/abs_files/ftp/arxiv/papers/1702/1702.00249.abs
@@ -0,0 +1,29 @@
+------------------------------------------------------------------------------
+\\
+arXiv:1702.00249
+From: Martin Eker{\aa} <ARXIVOPS-805@example.org>
+Date: Wed, 1 Feb 2017 13:41:33 GMT   (13kb,D)
+
+Title: Quantum algorithms for computing short discrete logarithms and factoring
+  RSA integers
+Authors: Martin Eker{\aa} and Johan H{\aa}stad
+Categories: cs.CR quant-ph
+License: http://arxiv.org/licenses/nonexclusive-distrib/1.0/
+\\
+  In this paper we generalize the quantum algorithm for computing short
+discrete logarithms previously introduced by Eker{\aa} so as to allow for
+various tradeoffs between the number of times that the algorithm need be
+executed on the one hand, and the complexity of the algorithm and the
+requirements it imposes on the quantum computer on the other hand.
+  Furthermore, we describe applications of algorithms for computing short
+discrete logarithms. In particular, we show how other important problems such
+as those of factoring RSA integers and of finding the order of groups under
+side information may be recast as short discrete logarithm problems. This
+immediately gives rise to an algorithm for factoring RSA integers that is less
+complex than Shor's general factoring algorithm in the sense that it imposes
+smaller requirements on the quantum computer.
+  In both our algorithm and Shor's algorithm, the main hurdle is to compute a
+modular exponentiation in superposition. When factoring an n bit integer, the
+exponent is of length 2n bits in Shor's algorithm, compared to slightly more
+than n/2 bits in our algorithm.
+\\
diff --git a/tests/legacy_comparison/__init__.py b/tests/legacy_comparison/__init__.py
@@ -0,0 +1 @@
+"""Contains legacy comparison tests."""
diff --git a/tests/legacy_comparison/abs_page_comparison.py b/tests/legacy_comparison/abs_page_comparison.py
@@ -1,3 +1,4 @@
+"""Abs page comparison tests."""
 import argparse
 import itertools
 import sys
@@ -54,7 +55,7 @@
 To skip ancillary file comparisons: '--skip-ancillary'
 
 Improvements:
- Better reporting format, right now the comparisons produce just strings. 
+ Better reporting format, right now the comparisons produce just strings.
 """
 
 logging.basicConfig(filename="abs_page_comparison.log", level=logging.DEBUG)
@@ -110,6 +111,7 @@ def _paperid_generator_from_file(path: str, excluded: List[str])->Iterator[str]:
 
 
 def paperid_generator(path: str, excluded: List[str]) -> Iterator[str]:
+    """Generate an arXiv paper ID."""
     for ( dir_name, subdir_list, file_list) in os.walk(path):
         for fname in file_list:
             fname_path = os.path.join(dir_name, fname)
@@ -146,6 +148,7 @@ def paperid_iterator(path: str, excluded: List[str]) -> List[str]:
 
 
 def fetch_abs(compare_res_fn: Callable[[res_arg_dict], List[BadResult]], paper_id: str) -> Tuple[Dict, List[BadResult]]:
+    """Fetch an abs page."""
     ng_url = ng_abs_base_url + paper_id
     legacy_url = legacy_abs_base_url + paper_id
 
@@ -163,8 +166,12 @@ def fetch_abs(compare_res_fn: Callable[[res_arg_dict], List[BadResult]], paper_i
 
 
 def run_compare_response(skips: Set[str], res_args: res_arg_dict) -> Iterator[BadResult]:
-    """ This is also where we do most of the cleaning on text, for things
-    we know that we do not want to compare."""
+    """
+    Compare responses.
+
+    This is also where we do most of the cleaning on text, for things
+    we know that we do not want to compare.
+    """
     legacy_text = res_args['legacy_res'].text
     ng_text = res_args['ng_res'].text
 
@@ -188,12 +195,13 @@ def call_it(fn: Callable[[text_arg_dict], BadResult]) -> BadResult:
              return BadResult(res_args['paper_id'], 'run_compare_response', traceback.format_exc())
 
     logging.debug(f"about to do compares for {res_args['paper_id']}")
-    
+
     return filter(None, itertools.chain(
         map(call_it, res_comparisons), run_compare_text(text_dict)))
 
 
 def run_compare_text(text_args: text_arg_dict) -> Iterator[BadResult]:
+    """Run the text comparison."""
     html_dict = process_text(text_args)
 
     def call_it(fn: Callable[[html_arg_dict], BadResult]) -> BadResult:
@@ -208,8 +216,9 @@ def call_it(fn: Callable[[html_arg_dict], BadResult]) -> BadResult:
 
 
 def run_compare_html(html_args: html_arg_dict) -> Iterator[BadResult]:
+    """Run comparison against HTML."""
     logging.debug(f'about to run HTML compares for {html_args["paper_id"]}')
-    
+
     def call_it(fn: Callable[[html_arg_dict], BadResult]) -> BadResult:
         # noinspection PyBroadException
         try:
@@ -223,10 +232,12 @@ def call_it(fn: Callable[[html_arg_dict], BadResult]) -> BadResult:
 
 
 def rm_email_hash(text: str) -> str:
+    """Remove the hash from the email link."""
     return re.sub(r'show-email/\w+/', 'show-email/', text)
 
 
 def process_text(text_args: text_arg_dict) -> html_arg_dict:
+    """Process text for comparison."""
     text_args['ng_text'] = ' '.join(text_args['ng_text'].split())
     text_args['legacy_text'] = ' '.join(text_args['legacy_text'].split())
 
@@ -242,6 +253,7 @@ def process_text(text_args: text_arg_dict) -> html_arg_dict:
 
 
 def main() -> None:
+    """Run the abs page comparison with provided arguments."""
     parser = argparse.ArgumentParser(
         description='Compare ng browse to legacy browse')
     parser.add_argument('--ids', default=False, )
@@ -302,22 +314,23 @@ def done()->bool:
 
                 def done_job( job ):
                     (config, bad_results) = job
-                    logging.debug(f"completed {config['paper_id']}")                                        
-                    visited_fh.write(f"{config['paper_id']}\n")                    
+                    logging.debug(f"completed {config['paper_id']}")
+                    visited_fh.write(f"{config['paper_id']}\n")
                     write_comparison(report_fh, (config,bad_results))
                     if done():
                         logging.info("done and existing")
                         exit(0)
-                        
+
                 [done_job(job) for job in completed_jobs]
 
 
 def _serialize(obj):
-    """JSON serializer for objects not serializable by default json code"""
+    """JSON serializer for objects not serializable by default json code."""
     return obj.__dict__
 
 
 def write_comparison(report_fh, result: Tuple[Dict, List[BadResult]])-> None:
+    """Write comparison output."""
     (config, bad_results) = result
     logging.debug("writing report for %s", config['paper_id'])
     if bad_results:
@@ -332,6 +345,7 @@ def write_comparison(report_fh, result: Tuple[Dict, List[BadResult]])-> None:
 
 
 def format_bad_result(bad: BadResult)->str:
+    """Format the BadResult object to a readable string."""
     rpt = f"** {bad.comparison}\n" \
           f"{bad.message} "
     if bad.similarity:
@@ -346,7 +360,7 @@ def format_bad_result(bad: BadResult)->str:
 
 
 def strip_by_delim(text: str, start: str, end: str) -> str:
-
+    """Strip text by delimiter."""
     if (start in text) and (end in text):
         def find_start() -> int:
             return text.index(start)

diff --git a/tests/legacy_comparison/abstract_comparisons.py b/tests/legacy_comparison/abstract_comparisons.py
@@ -1,14 +1,16 @@
+"""Includes function to compare abstracts."""
 from weighted_levenshtein import lev
 
 
 def lev_similarity(aa: str, bb: str) -> float:
     """
+    Get a Levenshtein similarity score.
+
     :param aa: first string
     :param bb: second string
     :return: The similarity of the two strings (0=bad, 1=match):
              1- lev(aa,bb)/max(len(aa), len(bb))
     """
-
     # Since weighted levenshtein can't handle unicode,
     # convert to ASCII first: