Merge pull request #50 from flyingcircusio/parse_requirements_string

Add parse_requirement_string function to parse requirement strings
flyingcircusio · May 15, 2024 · 550932d · 550932d
2 parents 796a855 + 1149608
commit 550932d
Show file tree

Hide file tree

Showing 2 changed files with 87 additions and 17 deletions.
diff --git a/src/appenv.py b/src/appenv.py
@@ -25,6 +25,7 @@
 import sys
 import tempfile
 import venv
+import re
 
 
 def cmd(c, merge_stderr=True, quiet=False):
@@ -225,6 +226,67 @@ def ensure_best_python(base):
         sys.exit(65)
 
 
+class ParsedRequirement:
+    """A parsed requirement from a requirement string.
+
+    Has a similiar interface to the real Requirement class from
+    packaging.requirements, but is reduced to the parts we need.
+    """
+
+    def __init__(self, name, url, requirement_string):
+        self.name = name
+        self.url = url
+        self.requirement_string = requirement_string
+
+    def __str__(self):
+        return self.requirement_string
+
+
+def parse_requirement_string(requirement_string):
+    """Parse a requirement from a requirement string.
+
+    This function is a simplified version of the Requirement class from
+    packaging.requirements.
+    Previously, this was done using pkg_resources.parse_requirements,
+    but pkg_resources is deprecated and errors out on import.
+    And the replacement packaging is apparently not packaged in python
+    virtualenvs where we need it.
+
+    See packaging / _parser.py for the requirements grammar.
+    As well as packaging / _tokenizer.py for the tokenization rules/regexes.
+    """
+    # packaging / _tokenizer.py
+    identifier_regex = r"\b[a-zA-Z0-9][a-zA-Z0-9._-]*\b"
+    url_regex = r"[^ \t]+"
+    whitespace_regex = r"[ \t]+"
+    # comments copied from packaging / _parser.py
+    # requirement = WS? IDENTIFIER WS? extras WS? requirement_details
+    # extras = (LEFT_BRACKET wsp* extras_list? wsp* RIGHT_BRACKET)?
+    # requirement_details = AT URL (WS requirement_marker?)?
+    #                     | specifier WS? (requirement_marker)?
+    # requirement_marker = SEMICOLON marker WS?
+    # consider these comments for illustrative purporses only, since according
+    # to the source code, the actual grammar is subtly different from this :)
+
+    # We will make some simplifications here:
+    # - We only care about the name, and URL if present.
+    # - We assume that the requirement string is well-formed. If not,
+    #   pip operations will fail later on.
+    # - We will not parse extras, specifiers, or markers.
+
+    # check for name
+    name_match = re.search(f"^(?:{whitespace_regex})?{identifier_regex}",
+                           requirement_string)
+    name = name_match.group() if name_match else None
+    # check for URL
+    url_match = re.search(
+        f"@(?:{whitespace_regex})?(?P<url>{url_regex})"
+        f"(?:{whitespace_regex})?;?", requirement_string)
+    url = url_match.group('url') if url_match else None
+
+    return ParsedRequirement(name, url, requirement_string)
+
+
 class AppEnv(object):
 
     base = None  # The directory where we add the environments. Co-located
@@ -444,19 +506,6 @@ def update_lockfile(self, args=None, remaining=None):
         cmd("{tmpdir}/bin/python -m pip install -r requirements.txt".format(
             tmpdir=tmpdir))
 
-        # Hack because we might not have packaging, but the venv should
-        tmp_paths = cmd(
-            "{tmpdir}/bin/python -c"
-            " 'import sys; print(\"\\n\".join(sys.path))'".format(
-                tmpdir=tmpdir),
-            merge_stderr=False).decode(sys.getfilesystemencoding())
-        for line in tmp_paths.splitlines():
-            line = line.strip()
-            if not line:
-                continue
-            sys.path.append(line)
-        from packaging.requirements import Requirement
-
         extra_specs = []
         result = cmd(
             "{tmpdir}/bin/python -m pip freeze".format(tmpdir=tmpdir),
@@ -466,8 +515,8 @@ def update_lockfile(self, args=None, remaining=None):
             if line.strip().startswith('-e '):
                 # We'd like to pick up the original -e statement here.
                 continue
-            spec = Requirement(line)
-            pinned_versions[spec.name] = spec
+            parsed_requirement = parse_requirement_string(line)
+            pinned_versions[parsed_requirement.name] = parsed_requirement
         requested_versions = {}
         with open('requirements.txt') as f:
             for line in f.readlines():
@@ -481,8 +530,9 @@ def update_lockfile(self, args=None, remaining=None):
                 # filter comments, in particular # appenv-python-preferences
                 if line.strip().startswith('#'):
                     continue
-                spec = Requirement(line)
-                requested_versions[spec.name] = spec
+                parsed_requirement = parse_requirement_string(line)
+                requested_versions[
+                    parsed_requirement.name] = parsed_requirement
 
         final_versions = {}
         for spec in requested_versions.values():

diff --git a/tests/test_update_lockfile.py b/tests/test_update_lockfile.py
@@ -90,3 +90,23 @@ def new_which(string):
         with pytest.raises(SystemExit) as e:
             env.update_lockfile()
     assert e.value.code == 66
+
+
+def test_parse_requirement_name():
+    req_strings_without_url = [
+        "foo",
+        "foo[bar]",
+        "foo[bar,baz]~=1.0",
+        "foo==1.0",
+        "foo[bar,baz]!=1.0",
+        "foo<1.0",]
+    req_strings_with_url = [
+        "foo[bar,baz] @ https://example.com",
+        "foo[bar,baz] @ https://example.com ; python_version < '3.6'",]
+    for req_string in req_strings_without_url:
+        req = appenv.parse_requirement_string(req_string)
+        assert req.name == "foo"
+    for req_string in req_strings_with_url:
+        req = appenv.parse_requirement_string(req_string)
+        assert req.name == "foo"
+        assert req.url == "https://example.com"