From f2f03ba8b58c9e2bdbbff7b5e8c905e3a8c08021 Mon Sep 17 00:00:00 2001 From: Sridhar <59284206+sridhar562345@users.noreply.github.com> Date: Sun, 20 Aug 2023 12:41:14 +0530 Subject: [PATCH] update manylinux wheels download logic (#1250) * update manylinux wheels download logic * add tests for updated manylinux logic * Remove incorrect return statement from get_manylinux_wheel_url * convert the manylinux file names to lowercase in get_manylinux_wheel_url * add test to verify manylinux filenames are lowered * change print to logger output * mock requests.get in test_verify_manylinux_filename_is_lowered * apply black * fix mock.patch reference * :wrench: add `ignore_cache` option for ease of testing. :white_check_mark: update testcase to use `ignore_cache` * :art: fix flake8 --------- Co-authored-by: monkut Co-authored-by: shane --- tests/tests.py | 61 ++++++++++++++++++++++++++++++++++++++++++++++++++ zappa/core.py | 58 ++++++++++++++++++++++++++++++----------------- 2 files changed, 98 insertions(+), 21 deletions(-) diff --git a/tests/tests.py b/tests/tests.py index a08d30697..0b415d18a 100644 --- a/tests/tests.py +++ b/tests/tests.py @@ -197,6 +197,67 @@ def test_get_manylinux_python310(self): self.assertTrue(os.path.isfile(path)) os.remove(path) + def test_verify_python37_does_not_download_2_24_manylinux_wheel(self): + z = Zappa(runtime="python3.7") + cached_wheels_dir = os.path.join(tempfile.gettempdir(), "cached_wheels") + expected_wheel_path = os.path.join( + cached_wheels_dir, "cryptography-35.0.0-cp36-abi3-manylinux_2_12_x86_64.manylinux2010_x86_64.whl" + ) + + # Check with known manylinux wheel package + actual_wheel_path = z.get_cached_manylinux_wheel("cryptography", "35.0.0") + self.assertEqual(actual_wheel_path, expected_wheel_path) + os.remove(actual_wheel_path) + + def test_verify_downloaded_manylinux_wheel(self): + z = Zappa(runtime="python3.10") + cached_wheels_dir = os.path.join(tempfile.gettempdir(), "cached_wheels") + expected_wheel_path = os.path.join( + cached_wheels_dir, + "pycryptodome-3.16.0-cp35-abi3-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", + ) + + # check with a known manylinux wheel package + actual_wheel_path = z.get_cached_manylinux_wheel("pycryptodome", "3.16.0") + self.assertEqual(actual_wheel_path, expected_wheel_path) + os.remove(actual_wheel_path) + + def test_verify_manylinux_filename_is_lowered(self): + z = Zappa(runtime="python3.10") + expected_filename = "markupsafe-2.1.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl" + + mock_package_data = { + "releases": { + "2.1.3": [ + { + "url": "https://files.pythonhosted.org/packages/a6/56/f1d4ee39e898a9e63470cbb7fae1c58cce6874f25f54220b89213a47f273/MarkupSafe-2.1.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", + "filename": "MarkupSafe-2.1.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", + }, + { + "url": "https://files.pythonhosted.org/packages/12/b3/d9ed2c0971e1435b8a62354b18d3060b66c8cb1d368399ec0b9baa7c0ee5/MarkupSafe-2.1.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", + "filename": "MarkupSafe-2.1.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", + }, + { + "url": "https://files.pythonhosted.org/packages/bf/b7/c5ba9b7ad9ad21fc4a60df226615cf43ead185d328b77b0327d603d00cc5/MarkupSafe-2.1.3-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", + "filename": "MarkupSafe-2.1.3-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", + }, + ] + } + } + + with mock.patch("zappa.core.requests.get") as mock_get: + mock_get.return_value.json.return_value = mock_package_data + wheel_url, file_name = z.get_manylinux_wheel_url("markupsafe", "2.1.3", ignore_cache=True) + + self.assertEqual(file_name, expected_filename) + mock_get.assert_called_once_with( + "https://pypi.python.org/pypi/markupsafe/json", timeout=float(os.environ.get("PIP_TIMEOUT", 1.5)) + ) + + # Clean the generated files + cached_pypi_info_dir = os.path.join(tempfile.gettempdir(), "cached_pypi_info") + os.remove(os.path.join(cached_pypi_info_dir, "markupsafe-2.1.3.json")) + def test_get_manylinux_python311(self): z = Zappa(runtime="python3.11") self.assertIsNotNone(z.get_cached_manylinux_wheel("psycopg2-binary", "2.9.7")) diff --git a/zappa/core.py b/zappa/core.py index 19a95821d..ae80b77e1 100644 --- a/zappa/core.py +++ b/zappa/core.py @@ -24,6 +24,7 @@ from builtins import bytes, int from distutils.dir_util import copy_tree from io import open +from pathlib import Path from typing import Optional import boto3 @@ -320,13 +321,17 @@ def __init__( self.manylinux_suffix_start = "cp311" # AWS Lambda supports manylinux1/2010, manylinux2014, and manylinux_2_24 - manylinux_suffixes = ("_2_24", "2014", "2010", "1") + # Currently python3.7 lambda runtime does not support manylinux_2_24 + # See https://github.com/zappa/Zappa/issues/1249 for more details + if self.runtime == "python3.7": + self.manylinux_suffixes = ("2014", "2010", "1") + else: + self.manylinux_suffixes = ("_2_24", "2014", "2010", "1") + self.manylinux_wheel_file_match = re.compile( - rf'^.*{self.manylinux_suffix_start}-(manylinux_\d+_\d+_x86_64[.])?manylinux({"|".join(manylinux_suffixes)})_x86_64[.]whl$' # noqa: E501 - ) - self.manylinux_wheel_abi3_file_match = re.compile( - rf'^.*cp3.-abi3-manylinux({"|".join(manylinux_suffixes)})_x86_64.whl$' + rf'^.*{self.manylinux_suffix_start}-(manylinux_\d+_\d+_x86_64[.])?manylinux({"|".join(self.manylinux_suffixes)})_x86_64[.]whl$' # noqa: E501 ) + self.manylinux_wheel_abi3_file_match = re.compile(rf"^.*cp3.-abi3-manylinux.*_x86_64[.]whl$") self.endpoint_urls = endpoint_urls self.xray_tracing = xray_tracing @@ -922,11 +927,14 @@ def get_cached_manylinux_wheel(self, package_name, package_version, disable_prog wheel_path = os.path.join(cached_wheels_dir, wheel_file) for pathname in glob.iglob(wheel_path): - if re.match(self.manylinux_wheel_file_match, pathname) or re.match( - self.manylinux_wheel_abi3_file_match, pathname - ): - print(f" - {package_name}=={package_version}: Using locally cached manylinux wheel") + if re.match(self.manylinux_wheel_file_match, pathname): + logger.info(f" - {package_name}=={package_version}: Using locally cached manylinux wheel") return pathname + elif re.match(self.manylinux_wheel_abi3_file_match, pathname): + for manylinux_suffix in self.manylinux_suffixes: + if f"manylinux{manylinux_suffix}_x86_64" in pathname: + logger.info(f" - {package_name}=={package_version}: Using locally cached manylinux wheel") + return pathname # The file is not cached, download it. wheel_url, filename = self.get_manylinux_wheel_url(package_name, package_version) @@ -934,7 +942,7 @@ def get_cached_manylinux_wheel(self, package_name, package_version, disable_prog return None wheel_path = os.path.join(cached_wheels_dir, filename) - print(f" - {package_name}=={package_version}: Downloading") + logger.info(f" - {package_name}=={package_version}: Downloading") with open(wheel_path, "wb") as f: self.download_url_with_progress(wheel_url, f, disable_progress) @@ -943,7 +951,7 @@ def get_cached_manylinux_wheel(self, package_name, package_version, disable_prog return wheel_path - def get_manylinux_wheel_url(self, package_name, package_version): + def get_manylinux_wheel_url(self, package_name, package_version, ignore_cache: bool = False): """ For a given package name, returns a link to the download URL, else returns None. @@ -954,27 +962,31 @@ def get_manylinux_wheel_url(self, package_name, package_version): also caches the JSON file so that we don't have to poll Pypi every time. """ - cached_pypi_info_dir = os.path.join(tempfile.gettempdir(), "cached_pypi_info") - if not os.path.isdir(cached_pypi_info_dir): + cached_pypi_info_dir = Path(tempfile.gettempdir()) / "cached_pypi_info" + if not cached_pypi_info_dir.is_dir(): os.makedirs(cached_pypi_info_dir) + # Even though the metadata is for the package, we save it in a # filename that includes the package's version. This helps in # invalidating the cached file if the user moves to a different # version of the package. # Related: https://github.com/Miserlou/Zappa/issues/899 - json_file = "{0!s}-{1!s}.json".format(package_name, package_version) - json_file_path = os.path.join(cached_pypi_info_dir, json_file) - if os.path.exists(json_file_path): - with open(json_file_path, "rb") as metafile: + data = None + json_file_name = "{0!s}-{1!s}.json".format(package_name, package_version) + json_file_path = cached_pypi_info_dir / json_file_name + if json_file_path.exists(): + with json_file_path.open("rb") as metafile: data = json.load(metafile) - else: + + if not data or ignore_cache: url = "https://pypi.python.org/pypi/{}/json".format(package_name) try: res = requests.get(url, timeout=float(os.environ.get("PIP_TIMEOUT", 1.5))) data = res.json() except Exception: # pragma: no cover return None, None - with open(json_file_path, "wb") as metafile: + + with json_file_path.open("wb") as metafile: jsondata = json.dumps(data) metafile.write(bytes(jsondata, "utf-8")) @@ -984,9 +996,13 @@ def get_manylinux_wheel_url(self, package_name, package_version): for f in data["releases"][package_version]: if re.match(self.manylinux_wheel_file_match, f["filename"]): - return f["url"], f["filename"] + # Since we have already lowered package names in get_installed_packages + # manylinux caching is not working for packages with capital case in names like MarkupSafe + return f["url"], f["filename"].lower() elif re.match(self.manylinux_wheel_abi3_file_match, f["filename"]): - return f["url"], f["filename"] + for manylinux_suffix in self.manylinux_suffixes: + if f"manylinux{manylinux_suffix}_x86_64" in f["filename"]: + return f["url"], f["filename"].lower() return None, None ##