From 9c816f20e8d4ba8df0148b11dfdefd6bf0cb8e8d Mon Sep 17 00:00:00 2001 From: David Michaels Date: Mon, 8 Jul 2024 12:05:46 -0400 Subject: [PATCH 01/20] Added numpy ^1.26.4 in pyproject.toml --- CHANGELOG.rst | 6 +++ poetry.lock | 101 +++++++++++++++++++++++-------------------------- pyproject.toml | 6 +-- 3 files changed, 57 insertions(+), 56 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 5d382a007..68278f59b 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -6,6 +6,12 @@ dcicutils Change Log ---------- +8.13.1 +====== +* Fallout from Python 3.12 support. + - Since numpy not in pyproject.toml and we need numpy < 2 (^1.26.4) failed to import dcicutils.ff_utils. + + 8.13.0 ====== * Updates related to Python 3.12. diff --git a/poetry.lock b/poetry.lock index b7b40dcc7..e9c60d4de 100644 --- a/poetry.lock +++ b/poetry.lock @@ -545,7 +545,6 @@ files = [ [package.dependencies] types-awscrt = "*" -typing-extensions = {version = ">=4.1.0", markers = "python_version < \"3.9\""} [package.extras] botocore = ["botocore"] @@ -1038,25 +1037,6 @@ files = [ {file = "idna-3.7.tar.gz", hash = "sha256:028ff3aadf0609c1fd278d8ea3089299412a7a8b9bd005dd08b9f8285bcb5cfc"}, ] -[[package]] -name = "importlib-resources" -version = "6.4.0" -description = "Read resources from Python packages" -category = "main" -optional = false -python-versions = ">=3.8" -files = [ - {file = "importlib_resources-6.4.0-py3-none-any.whl", hash = "sha256:50d10f043df931902d4194ea07ec57960f66a80449ff867bfe782b4c486ba78c"}, - {file = "importlib_resources-6.4.0.tar.gz", hash = "sha256:cdb2b453b8046ca4e3798eb1d84f3cce1446a0e8e7b5ef4efb600f19fc398145"}, -] - -[package.dependencies] -zipp = {version = ">=3.1.0", markers = "python_version < \"3.10\""} - -[package.extras] -docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (<7.2.5)", "sphinx (>=3.5)", "sphinx-lint"] -testing = ["jaraco.test (>=5.4)", "pytest (>=6)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-mypy", "pytest-ruff (>=0.2.1)", "zipp (>=3.17)"] - [[package]] name = "iniconfig" version = "2.0.0" @@ -1107,9 +1087,7 @@ files = [ [package.dependencies] attrs = ">=22.2.0" -importlib-resources = {version = ">=1.4.0", markers = "python_version < \"3.9\""} jsonschema-specifications = ">=2023.03.6" -pkgutil-resolve-name = {version = ">=1.3.10", markers = "python_version < \"3.9\""} referencing = ">=0.28.4" rpds-py = ">=0.7.1" @@ -1130,7 +1108,6 @@ files = [ ] [package.dependencies] -importlib-resources = {version = ">=1.4.0", markers = "python_version < \"3.9\""} referencing = ">=0.31.0" [[package]] @@ -1160,6 +1137,52 @@ files = [ [package.dependencies] psutil = {version = ">=4.0.0", markers = "sys_platform != \"cygwin\""} +[[package]] +name = "numpy" +version = "1.26.4" +description = "Fundamental package for array computing in Python" +category = "main" +optional = false +python-versions = ">=3.9" +files = [ + {file = "numpy-1.26.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:9ff0f4f29c51e2803569d7a51c2304de5554655a60c5d776e35b4a41413830d0"}, + {file = "numpy-1.26.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:2e4ee3380d6de9c9ec04745830fd9e2eccb3e6cf790d39d7b98ffd19b0dd754a"}, + {file = "numpy-1.26.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d209d8969599b27ad20994c8e41936ee0964e6da07478d6c35016bc386b66ad4"}, + {file = "numpy-1.26.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ffa75af20b44f8dba823498024771d5ac50620e6915abac414251bd971b4529f"}, + {file = "numpy-1.26.4-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:62b8e4b1e28009ef2846b4c7852046736bab361f7aeadeb6a5b89ebec3c7055a"}, + {file = "numpy-1.26.4-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:a4abb4f9001ad2858e7ac189089c42178fcce737e4169dc61321660f1a96c7d2"}, + {file = "numpy-1.26.4-cp310-cp310-win32.whl", hash = "sha256:bfe25acf8b437eb2a8b2d49d443800a5f18508cd811fea3181723922a8a82b07"}, + {file = "numpy-1.26.4-cp310-cp310-win_amd64.whl", hash = "sha256:b97fe8060236edf3662adfc2c633f56a08ae30560c56310562cb4f95500022d5"}, + {file = "numpy-1.26.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:4c66707fabe114439db9068ee468c26bbdf909cac0fb58686a42a24de1760c71"}, + {file = "numpy-1.26.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:edd8b5fe47dab091176d21bb6de568acdd906d1887a4584a15a9a96a1dca06ef"}, + {file = "numpy-1.26.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7ab55401287bfec946ced39700c053796e7cc0e3acbef09993a9ad2adba6ca6e"}, + {file = "numpy-1.26.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:666dbfb6ec68962c033a450943ded891bed2d54e6755e35e5835d63f4f6931d5"}, + {file = "numpy-1.26.4-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:96ff0b2ad353d8f990b63294c8986f1ec3cb19d749234014f4e7eb0112ceba5a"}, + {file = "numpy-1.26.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:60dedbb91afcbfdc9bc0b1f3f402804070deed7392c23eb7a7f07fa857868e8a"}, + {file = "numpy-1.26.4-cp311-cp311-win32.whl", hash = "sha256:1af303d6b2210eb850fcf03064d364652b7120803a0b872f5211f5234b399f20"}, + {file = "numpy-1.26.4-cp311-cp311-win_amd64.whl", hash = "sha256:cd25bcecc4974d09257ffcd1f098ee778f7834c3ad767fe5db785be9a4aa9cb2"}, + {file = "numpy-1.26.4-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:b3ce300f3644fb06443ee2222c2201dd3a89ea6040541412b8fa189341847218"}, + {file = "numpy-1.26.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:03a8c78d01d9781b28a6989f6fa1bb2c4f2d51201cf99d3dd875df6fbd96b23b"}, + {file = "numpy-1.26.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9fad7dcb1aac3c7f0584a5a8133e3a43eeb2fe127f47e3632d43d677c66c102b"}, + {file = "numpy-1.26.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:675d61ffbfa78604709862923189bad94014bef562cc35cf61d3a07bba02a7ed"}, + {file = "numpy-1.26.4-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:ab47dbe5cc8210f55aa58e4805fe224dac469cde56b9f731a4c098b91917159a"}, + {file = "numpy-1.26.4-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:1dda2e7b4ec9dd512f84935c5f126c8bd8b9f2fc001e9f54af255e8c5f16b0e0"}, + {file = "numpy-1.26.4-cp312-cp312-win32.whl", hash = "sha256:50193e430acfc1346175fcbdaa28ffec49947a06918b7b92130744e81e640110"}, + {file = "numpy-1.26.4-cp312-cp312-win_amd64.whl", hash = "sha256:08beddf13648eb95f8d867350f6a018a4be2e5ad54c8d8caed89ebca558b2818"}, + {file = "numpy-1.26.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:7349ab0fa0c429c82442a27a9673fc802ffdb7c7775fad780226cb234965e53c"}, + {file = "numpy-1.26.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:52b8b60467cd7dd1e9ed082188b4e6bb35aa5cdd01777621a1658910745b90be"}, + {file = "numpy-1.26.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d5241e0a80d808d70546c697135da2c613f30e28251ff8307eb72ba696945764"}, + {file = "numpy-1.26.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f870204a840a60da0b12273ef34f7051e98c3b5961b61b0c2c1be6dfd64fbcd3"}, + {file = "numpy-1.26.4-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:679b0076f67ecc0138fd2ede3a8fd196dddc2ad3254069bcb9faf9a79b1cebcd"}, + {file = "numpy-1.26.4-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:47711010ad8555514b434df65f7d7b076bb8261df1ca9bb78f53d3b2db02e95c"}, + {file = "numpy-1.26.4-cp39-cp39-win32.whl", hash = "sha256:a354325ee03388678242a4d7ebcd08b5c727033fcff3b2f536aea978e15ee9e6"}, + {file = "numpy-1.26.4-cp39-cp39-win_amd64.whl", hash = "sha256:3373d5d70a5fe74a2c1bb6d2cfd9609ecf686d47a2d7b1d37a8f3b6bf6003aea"}, + {file = "numpy-1.26.4-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:afedb719a9dcfc7eaf2287b839d8198e06dcd4cb5d276a3df279231138e83d30"}, + {file = "numpy-1.26.4-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:95a7476c59002f2f6c590b9b7b998306fba6a5aa646b1e22ddfeaf8f78c3a29c"}, + {file = "numpy-1.26.4-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:7e50d0a0cc3189f9cb0aeb3a6a6af18c16f59f004b866cd2be1c14b36134a4a0"}, + {file = "numpy-1.26.4.tar.gz", hash = "sha256:2a02aba9ed12e4ac4eb3ea9421c420301a0c6460d9830d74a9df87efa4912010"}, +] + [[package]] name = "openpyxl" version = "3.1.5" @@ -1251,18 +1274,6 @@ prettytable = ">=2.3.0" [package.extras] test = ["docutils", "mypy", "pytest-cov", "pytest-pycodestyle", "pytest-runner"] -[[package]] -name = "pkgutil-resolve-name" -version = "1.3.10" -description = "Resolve a name to an object." -category = "main" -optional = false -python-versions = ">=3.6" -files = [ - {file = "pkgutil_resolve_name-1.3.10-py3-none-any.whl", hash = "sha256:ca27cc078d25c5ad71a9de0a7a330146c4e014c2462d9af19c6b828280649c5e"}, - {file = "pkgutil_resolve_name-1.3.10.tar.gz", hash = "sha256:357d6c9e6a755653cfd78893817c0853af365dd51ec97f3d358a819373bbd174"}, -] - [[package]] name = "plaster" version = "1.1.2" @@ -2185,22 +2196,6 @@ WebOb = ">=1.2" docs = ["Sphinx (>=1.8.1)", "docutils", "pylons-sphinx-themes (>=1.0.8)"] tests = ["PasteDeploy", "WSGIProxy2", "coverage", "mock", "nose (<1.3.0)", "pyquery"] -[[package]] -name = "zipp" -version = "3.19.2" -description = "Backport of pathlib-compatible object wrapper for zip files" -category = "main" -optional = false -python-versions = ">=3.8" -files = [ - {file = "zipp-3.19.2-py3-none-any.whl", hash = "sha256:f091755f667055f2d02b32c53771a7a6c8b47e1fdbc4b72a8b9072b3eef8015c"}, - {file = "zipp-3.19.2.tar.gz", hash = "sha256:bf1dcf6450f873a13e952a29504887c89e6de7506209e5b1bcc3460135d4de19"}, -] - -[package.extras] -doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] -test = ["big-O", "importlib-resources", "jaraco.functools", "jaraco.itertools", "jaraco.test", "more-itertools", "pytest (>=6,!=8.1.*)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-ignore-flaky", "pytest-mypy", "pytest-ruff (>=0.2.1)"] - [[package]] name = "zope-deprecation" version = "5.0" @@ -2276,5 +2271,5 @@ testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"] [metadata] lock-version = "2.0" -python-versions = ">=3.8.1,<3.13" -content-hash = "6d3f00c8a5e582bd82576dbfd357cd86065acac7b175e6938d75dd37ef9918e8" +python-versions = ">=3.9,<3.13" +content-hash = "d4619fe4b432f7f2a940f432dcb33caa5ee04008d43167f257941b8b6acf9926" diff --git a/pyproject.toml b/pyproject.toml index 204cf0a88..03d1b09f2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "dcicutils" -version = "8.13.0" +version = "8.13.1" # TODO: To become 8.13.1 description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources" authors = ["4DN-DCIC Team "] license = "MIT" @@ -29,7 +29,6 @@ classifiers = [ # Specify the Python versions you support here. 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.8', 'Programming Language :: Python :: 3.9', 'Programming Language :: Python :: 3.10', 'Programming Language :: Python :: 3.11', @@ -38,7 +37,7 @@ classifiers = [ [tool.poetry.dependencies] -python = ">=3.8.1,<3.13" +python = ">=3.9,<3.13" boto3 = "^1.34.136" botocore = "^1.34.136" # The DCIC portals (cgap-portal and fourfront) are very particular about which ElasticSearch version. @@ -51,6 +50,7 @@ docker = "^4.4.4" gitpython = "^3.1.2" jsonc-parser = "^1.1.5" jsonschema = "^4.22.0" +numpy = "^1.26.4" openpyxl = "^3.1.2" opensearch-py = "^2.0.1" pyOpenSSL = "^23.1.1" From 497f085cb90f54a96c1015d5a0cc720e6be673db Mon Sep 17 00:00:00 2001 From: David Michaels Date: Mon, 8 Jul 2024 12:16:13 -0400 Subject: [PATCH 02/20] backed out numpy dependency in pyproject - maybe not good solution --- poetry.lock | 48 +----------------------------------------------- pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 48 deletions(-) diff --git a/poetry.lock b/poetry.lock index e9c60d4de..d3b4e0487 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1137,52 +1137,6 @@ files = [ [package.dependencies] psutil = {version = ">=4.0.0", markers = "sys_platform != \"cygwin\""} -[[package]] -name = "numpy" -version = "1.26.4" -description = "Fundamental package for array computing in Python" -category = "main" -optional = false -python-versions = ">=3.9" -files = [ - {file = "numpy-1.26.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:9ff0f4f29c51e2803569d7a51c2304de5554655a60c5d776e35b4a41413830d0"}, - {file = "numpy-1.26.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:2e4ee3380d6de9c9ec04745830fd9e2eccb3e6cf790d39d7b98ffd19b0dd754a"}, - {file = "numpy-1.26.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d209d8969599b27ad20994c8e41936ee0964e6da07478d6c35016bc386b66ad4"}, - {file = "numpy-1.26.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ffa75af20b44f8dba823498024771d5ac50620e6915abac414251bd971b4529f"}, - {file = "numpy-1.26.4-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:62b8e4b1e28009ef2846b4c7852046736bab361f7aeadeb6a5b89ebec3c7055a"}, - {file = "numpy-1.26.4-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:a4abb4f9001ad2858e7ac189089c42178fcce737e4169dc61321660f1a96c7d2"}, - {file = "numpy-1.26.4-cp310-cp310-win32.whl", hash = "sha256:bfe25acf8b437eb2a8b2d49d443800a5f18508cd811fea3181723922a8a82b07"}, - {file = "numpy-1.26.4-cp310-cp310-win_amd64.whl", hash = "sha256:b97fe8060236edf3662adfc2c633f56a08ae30560c56310562cb4f95500022d5"}, - {file = "numpy-1.26.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:4c66707fabe114439db9068ee468c26bbdf909cac0fb58686a42a24de1760c71"}, - {file = "numpy-1.26.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:edd8b5fe47dab091176d21bb6de568acdd906d1887a4584a15a9a96a1dca06ef"}, - {file = "numpy-1.26.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7ab55401287bfec946ced39700c053796e7cc0e3acbef09993a9ad2adba6ca6e"}, - {file = "numpy-1.26.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:666dbfb6ec68962c033a450943ded891bed2d54e6755e35e5835d63f4f6931d5"}, - {file = "numpy-1.26.4-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:96ff0b2ad353d8f990b63294c8986f1ec3cb19d749234014f4e7eb0112ceba5a"}, - {file = "numpy-1.26.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:60dedbb91afcbfdc9bc0b1f3f402804070deed7392c23eb7a7f07fa857868e8a"}, - {file = "numpy-1.26.4-cp311-cp311-win32.whl", hash = "sha256:1af303d6b2210eb850fcf03064d364652b7120803a0b872f5211f5234b399f20"}, - {file = "numpy-1.26.4-cp311-cp311-win_amd64.whl", hash = "sha256:cd25bcecc4974d09257ffcd1f098ee778f7834c3ad767fe5db785be9a4aa9cb2"}, - {file = "numpy-1.26.4-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:b3ce300f3644fb06443ee2222c2201dd3a89ea6040541412b8fa189341847218"}, - {file = "numpy-1.26.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:03a8c78d01d9781b28a6989f6fa1bb2c4f2d51201cf99d3dd875df6fbd96b23b"}, - {file = "numpy-1.26.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9fad7dcb1aac3c7f0584a5a8133e3a43eeb2fe127f47e3632d43d677c66c102b"}, - {file = "numpy-1.26.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:675d61ffbfa78604709862923189bad94014bef562cc35cf61d3a07bba02a7ed"}, - {file = "numpy-1.26.4-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:ab47dbe5cc8210f55aa58e4805fe224dac469cde56b9f731a4c098b91917159a"}, - {file = "numpy-1.26.4-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:1dda2e7b4ec9dd512f84935c5f126c8bd8b9f2fc001e9f54af255e8c5f16b0e0"}, - {file = "numpy-1.26.4-cp312-cp312-win32.whl", hash = "sha256:50193e430acfc1346175fcbdaa28ffec49947a06918b7b92130744e81e640110"}, - {file = "numpy-1.26.4-cp312-cp312-win_amd64.whl", hash = "sha256:08beddf13648eb95f8d867350f6a018a4be2e5ad54c8d8caed89ebca558b2818"}, - {file = "numpy-1.26.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:7349ab0fa0c429c82442a27a9673fc802ffdb7c7775fad780226cb234965e53c"}, - {file = "numpy-1.26.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:52b8b60467cd7dd1e9ed082188b4e6bb35aa5cdd01777621a1658910745b90be"}, - {file = "numpy-1.26.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d5241e0a80d808d70546c697135da2c613f30e28251ff8307eb72ba696945764"}, - {file = "numpy-1.26.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f870204a840a60da0b12273ef34f7051e98c3b5961b61b0c2c1be6dfd64fbcd3"}, - {file = "numpy-1.26.4-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:679b0076f67ecc0138fd2ede3a8fd196dddc2ad3254069bcb9faf9a79b1cebcd"}, - {file = "numpy-1.26.4-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:47711010ad8555514b434df65f7d7b076bb8261df1ca9bb78f53d3b2db02e95c"}, - {file = "numpy-1.26.4-cp39-cp39-win32.whl", hash = "sha256:a354325ee03388678242a4d7ebcd08b5c727033fcff3b2f536aea978e15ee9e6"}, - {file = "numpy-1.26.4-cp39-cp39-win_amd64.whl", hash = "sha256:3373d5d70a5fe74a2c1bb6d2cfd9609ecf686d47a2d7b1d37a8f3b6bf6003aea"}, - {file = "numpy-1.26.4-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:afedb719a9dcfc7eaf2287b839d8198e06dcd4cb5d276a3df279231138e83d30"}, - {file = "numpy-1.26.4-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:95a7476c59002f2f6c590b9b7b998306fba6a5aa646b1e22ddfeaf8f78c3a29c"}, - {file = "numpy-1.26.4-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:7e50d0a0cc3189f9cb0aeb3a6a6af18c16f59f004b866cd2be1c14b36134a4a0"}, - {file = "numpy-1.26.4.tar.gz", hash = "sha256:2a02aba9ed12e4ac4eb3ea9421c420301a0c6460d9830d74a9df87efa4912010"}, -] - [[package]] name = "openpyxl" version = "3.1.5" @@ -2272,4 +2226,4 @@ testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"] [metadata] lock-version = "2.0" python-versions = ">=3.9,<3.13" -content-hash = "d4619fe4b432f7f2a940f432dcb33caa5ee04008d43167f257941b8b6acf9926" +content-hash = "7461ea7e3673c5e96328f6a1fddf75619c2e8daa9451d293aa4190657ee607ad" diff --git a/pyproject.toml b/pyproject.toml index 03d1b09f2..f057b0af1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -50,7 +50,7 @@ docker = "^4.4.4" gitpython = "^3.1.2" jsonc-parser = "^1.1.5" jsonschema = "^4.22.0" -numpy = "^1.26.4" +# numpy = "^1.26.4" openpyxl = "^3.1.2" opensearch-py = "^2.0.1" pyOpenSSL = "^23.1.1" From be0bc60fe0fa40d1a34ec8cf2f4b83205ae805d6 Mon Sep 17 00:00:00 2001 From: David Michaels Date: Mon, 8 Jul 2024 12:53:47 -0400 Subject: [PATCH 03/20] added hack_for_elasticsearch_numpy_usage.py for numpy>2 issue --- CHANGELOG.rst | 6 +- .../hack_for_elasticsearch_numpy_usage.py | 9 +++ poetry.lock | 55 ++++++++++++++++++- pyproject.toml | 6 +- 4 files changed, 70 insertions(+), 6 deletions(-) create mode 100644 dcicutils/hack_for_elasticsearch_numpy_usage.py diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 68278f59b..3f3f1b402 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -9,7 +9,11 @@ Change Log 8.13.1 ====== * Fallout from Python 3.12 support. - - Since numpy not in pyproject.toml and we need numpy < 2 (^1.26.4) failed to import dcicutils.ff_utils. + - Though dcicutils is not dependent on numpy, elasticsearch tries to import it, + and if it is installed and if it is a version greater than 1.x, we get this error: + AttributeError: `np.float_` was removed in the NumPy 2.0 release. Use `np.float64` instead. + So added a hack in hack_for_elasticsearch_numpy_usage.py for this specific case; + to be imported before we import elasticsearch modules. 8.13.0 diff --git a/dcicutils/hack_for_elasticsearch_numpy_usage.py b/dcicutils/hack_for_elasticsearch_numpy_usage.py new file mode 100644 index 000000000..c8e2493c6 --- /dev/null +++ b/dcicutils/hack_for_elasticsearch_numpy_usage.py @@ -0,0 +1,9 @@ +# Though dcicutils is not dependent on numpy, elasticsearch pulls it in iff it is installed, +# and if it is numpy 2.x the numpy.float_ constant has been retired and any reference to it +# yields an error from numpy (AttributeError: np.float_ was removed in the NumPy 2.0 release. +# Use np.float64 instead); this reference to numpy.float_ occurs in elasticsearch/serializer.py +try: + import numpy + numpy.float_ = numpy.float64 +except Exception: + pass diff --git a/poetry.lock b/poetry.lock index d3b4e0487..b7b40dcc7 100644 --- a/poetry.lock +++ b/poetry.lock @@ -545,6 +545,7 @@ files = [ [package.dependencies] types-awscrt = "*" +typing-extensions = {version = ">=4.1.0", markers = "python_version < \"3.9\""} [package.extras] botocore = ["botocore"] @@ -1037,6 +1038,25 @@ files = [ {file = "idna-3.7.tar.gz", hash = "sha256:028ff3aadf0609c1fd278d8ea3089299412a7a8b9bd005dd08b9f8285bcb5cfc"}, ] +[[package]] +name = "importlib-resources" +version = "6.4.0" +description = "Read resources from Python packages" +category = "main" +optional = false +python-versions = ">=3.8" +files = [ + {file = "importlib_resources-6.4.0-py3-none-any.whl", hash = "sha256:50d10f043df931902d4194ea07ec57960f66a80449ff867bfe782b4c486ba78c"}, + {file = "importlib_resources-6.4.0.tar.gz", hash = "sha256:cdb2b453b8046ca4e3798eb1d84f3cce1446a0e8e7b5ef4efb600f19fc398145"}, +] + +[package.dependencies] +zipp = {version = ">=3.1.0", markers = "python_version < \"3.10\""} + +[package.extras] +docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (<7.2.5)", "sphinx (>=3.5)", "sphinx-lint"] +testing = ["jaraco.test (>=5.4)", "pytest (>=6)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-mypy", "pytest-ruff (>=0.2.1)", "zipp (>=3.17)"] + [[package]] name = "iniconfig" version = "2.0.0" @@ -1087,7 +1107,9 @@ files = [ [package.dependencies] attrs = ">=22.2.0" +importlib-resources = {version = ">=1.4.0", markers = "python_version < \"3.9\""} jsonschema-specifications = ">=2023.03.6" +pkgutil-resolve-name = {version = ">=1.3.10", markers = "python_version < \"3.9\""} referencing = ">=0.28.4" rpds-py = ">=0.7.1" @@ -1108,6 +1130,7 @@ files = [ ] [package.dependencies] +importlib-resources = {version = ">=1.4.0", markers = "python_version < \"3.9\""} referencing = ">=0.31.0" [[package]] @@ -1228,6 +1251,18 @@ prettytable = ">=2.3.0" [package.extras] test = ["docutils", "mypy", "pytest-cov", "pytest-pycodestyle", "pytest-runner"] +[[package]] +name = "pkgutil-resolve-name" +version = "1.3.10" +description = "Resolve a name to an object." +category = "main" +optional = false +python-versions = ">=3.6" +files = [ + {file = "pkgutil_resolve_name-1.3.10-py3-none-any.whl", hash = "sha256:ca27cc078d25c5ad71a9de0a7a330146c4e014c2462d9af19c6b828280649c5e"}, + {file = "pkgutil_resolve_name-1.3.10.tar.gz", hash = "sha256:357d6c9e6a755653cfd78893817c0853af365dd51ec97f3d358a819373bbd174"}, +] + [[package]] name = "plaster" version = "1.1.2" @@ -2150,6 +2185,22 @@ WebOb = ">=1.2" docs = ["Sphinx (>=1.8.1)", "docutils", "pylons-sphinx-themes (>=1.0.8)"] tests = ["PasteDeploy", "WSGIProxy2", "coverage", "mock", "nose (<1.3.0)", "pyquery"] +[[package]] +name = "zipp" +version = "3.19.2" +description = "Backport of pathlib-compatible object wrapper for zip files" +category = "main" +optional = false +python-versions = ">=3.8" +files = [ + {file = "zipp-3.19.2-py3-none-any.whl", hash = "sha256:f091755f667055f2d02b32c53771a7a6c8b47e1fdbc4b72a8b9072b3eef8015c"}, + {file = "zipp-3.19.2.tar.gz", hash = "sha256:bf1dcf6450f873a13e952a29504887c89e6de7506209e5b1bcc3460135d4de19"}, +] + +[package.extras] +doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] +test = ["big-O", "importlib-resources", "jaraco.functools", "jaraco.itertools", "jaraco.test", "more-itertools", "pytest (>=6,!=8.1.*)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-ignore-flaky", "pytest-mypy", "pytest-ruff (>=0.2.1)"] + [[package]] name = "zope-deprecation" version = "5.0" @@ -2225,5 +2276,5 @@ testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"] [metadata] lock-version = "2.0" -python-versions = ">=3.9,<3.13" -content-hash = "7461ea7e3673c5e96328f6a1fddf75619c2e8daa9451d293aa4190657ee607ad" +python-versions = ">=3.8.1,<3.13" +content-hash = "6d3f00c8a5e582bd82576dbfd357cd86065acac7b175e6938d75dd37ef9918e8" diff --git a/pyproject.toml b/pyproject.toml index f057b0af1..204cf0a88 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "dcicutils" -version = "8.13.1" # TODO: To become 8.13.1 +version = "8.13.0" description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources" authors = ["4DN-DCIC Team "] license = "MIT" @@ -29,6 +29,7 @@ classifiers = [ # Specify the Python versions you support here. 'Programming Language :: Python :: 3', + 'Programming Language :: Python :: 3.8', 'Programming Language :: Python :: 3.9', 'Programming Language :: Python :: 3.10', 'Programming Language :: Python :: 3.11', @@ -37,7 +38,7 @@ classifiers = [ [tool.poetry.dependencies] -python = ">=3.9,<3.13" +python = ">=3.8.1,<3.13" boto3 = "^1.34.136" botocore = "^1.34.136" # The DCIC portals (cgap-portal and fourfront) are very particular about which ElasticSearch version. @@ -50,7 +51,6 @@ docker = "^4.4.4" gitpython = "^3.1.2" jsonc-parser = "^1.1.5" jsonschema = "^4.22.0" -# numpy = "^1.26.4" openpyxl = "^3.1.2" opensearch-py = "^2.0.1" pyOpenSSL = "^23.1.1" From d4c7098f9d32ebc4cc86754ceefc411b0a8b96c8 Mon Sep 17 00:00:00 2001 From: David Michaels Date: Mon, 8 Jul 2024 12:54:17 -0400 Subject: [PATCH 04/20] added hack_for_elasticsearch_numpy_usage.py for numpy>2 issue --- dcicutils/ff_utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/dcicutils/ff_utils.py b/dcicutils/ff_utils.py index 442a9642e..ea939c2bc 100644 --- a/dcicutils/ff_utils.py +++ b/dcicutils/ff_utils.py @@ -7,6 +7,7 @@ import time from collections import namedtuple +import dcicutils.hack_for_elasticsearch_numpy from elasticsearch.exceptions import AuthorizationException from typing import Dict, List, Optional from urllib.parse import parse_qs, urlencode, urlparse, urlunparse From df41fd4e4e4dae3b629692397355a048359151ae Mon Sep 17 00:00:00 2001 From: David Michaels Date: Mon, 8 Jul 2024 12:57:22 -0400 Subject: [PATCH 05/20] added hack_for_elasticsearch_numpy_usage.py for numpy>2 issue --- dcicutils/es_utils.py | 1 + dcicutils/ff_utils.py | 2 +- dcicutils/snapshot_utils.py | 1 + 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/dcicutils/es_utils.py b/dcicutils/es_utils.py index d1fd52a57..c576b3db7 100644 --- a/dcicutils/es_utils.py +++ b/dcicutils/es_utils.py @@ -1,6 +1,7 @@ import logging import boto3 from .misc_utils import PRINT +import dcicutils.hack_for_elasticsearch_numpy_usage from elasticsearch import Elasticsearch, RequestsHttpConnection from aws_requests_auth.boto_utils import BotoAWSRequestsAuth diff --git a/dcicutils/ff_utils.py b/dcicutils/ff_utils.py index ea939c2bc..860029468 100644 --- a/dcicutils/ff_utils.py +++ b/dcicutils/ff_utils.py @@ -7,7 +7,7 @@ import time from collections import namedtuple -import dcicutils.hack_for_elasticsearch_numpy +import dcicutils.hack_for_elasticsearch_numpy_usage from elasticsearch.exceptions import AuthorizationException from typing import Dict, List, Optional from urllib.parse import parse_qs, urlencode, urlparse, urlunparse diff --git a/dcicutils/snapshot_utils.py b/dcicutils/snapshot_utils.py index 253fc5858..33fc507f3 100644 --- a/dcicutils/snapshot_utils.py +++ b/dcicutils/snapshot_utils.py @@ -1,6 +1,7 @@ import datetime import logging +import dcicutils.hack_for_elasticsearch_numpy_usage from elasticsearch.exceptions import NotFoundError from .misc_utils import ( environ_bool, PRINT, camel_case_to_snake_case, full_object_name, From af95f20e7703f39ef698c4c6d35d8ec35e98c8f3 Mon Sep 17 00:00:00 2001 From: David Michaels Date: Mon, 8 Jul 2024 12:58:52 -0400 Subject: [PATCH 06/20] added hack_for_elasticsearch_numpy_usage.py for numpy>2 issue --- dcicutils/hack_for_elasticsearch_numpy_usage.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dcicutils/hack_for_elasticsearch_numpy_usage.py b/dcicutils/hack_for_elasticsearch_numpy_usage.py index c8e2493c6..230d20b14 100644 --- a/dcicutils/hack_for_elasticsearch_numpy_usage.py +++ b/dcicutils/hack_for_elasticsearch_numpy_usage.py @@ -1,7 +1,8 @@ # Though dcicutils is not dependent on numpy, elasticsearch pulls it in iff it is installed, # and if it is numpy 2.x the numpy.float_ constant has been retired and any reference to it # yields an error from numpy (AttributeError: np.float_ was removed in the NumPy 2.0 release. -# Use np.float64 instead); this reference to numpy.float_ occurs in elasticsearch/serializer.py +# Use np.float64 instead); this reference to numpy.float_ occurs in elasticsearch/serializer.py. +# and we short-circuit it here by explicitly setting numpy.float_ to numpyh.float64. try: import numpy numpy.float_ = numpy.float64 From 25d2f80d22b18a39d798dde973c7f57653737c28 Mon Sep 17 00:00:00 2001 From: David Michaels Date: Mon, 8 Jul 2024 13:00:06 -0400 Subject: [PATCH 07/20] added hack_for_elasticsearch_numpy_usage.py for numpy>2 issue --- dcicutils/es_utils.py | 2 +- dcicutils/ff_utils.py | 2 +- dcicutils/snapshot_utils.py | 2 +- pyproject.toml | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/dcicutils/es_utils.py b/dcicutils/es_utils.py index c576b3db7..237b563be 100644 --- a/dcicutils/es_utils.py +++ b/dcicutils/es_utils.py @@ -1,7 +1,7 @@ import logging import boto3 from .misc_utils import PRINT -import dcicutils.hack_for_elasticsearch_numpy_usage +import dcicutils.hack_for_elasticsearch_numpy_usage # noqa from elasticsearch import Elasticsearch, RequestsHttpConnection from aws_requests_auth.boto_utils import BotoAWSRequestsAuth diff --git a/dcicutils/ff_utils.py b/dcicutils/ff_utils.py index 860029468..d2456fc1f 100644 --- a/dcicutils/ff_utils.py +++ b/dcicutils/ff_utils.py @@ -7,7 +7,7 @@ import time from collections import namedtuple -import dcicutils.hack_for_elasticsearch_numpy_usage +import dcicutils.hack_for_elasticsearch_numpy_usage # noqa from elasticsearch.exceptions import AuthorizationException from typing import Dict, List, Optional from urllib.parse import parse_qs, urlencode, urlparse, urlunparse diff --git a/dcicutils/snapshot_utils.py b/dcicutils/snapshot_utils.py index 33fc507f3..4576bef4f 100644 --- a/dcicutils/snapshot_utils.py +++ b/dcicutils/snapshot_utils.py @@ -1,7 +1,7 @@ import datetime import logging -import dcicutils.hack_for_elasticsearch_numpy_usage +import dcicutils.hack_for_elasticsearch_numpy_usage # noqa from elasticsearch.exceptions import NotFoundError from .misc_utils import ( environ_bool, PRINT, camel_case_to_snake_case, full_object_name, diff --git a/pyproject.toml b/pyproject.toml index 204cf0a88..103c35308 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "dcicutils" -version = "8.13.0" +version = "8.13.0.1b1" # TODO: To become 8.13.1 description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources" authors = ["4DN-DCIC Team "] license = "MIT" From 75d8012ce76ec6be80036c41745cde93109e759b Mon Sep 17 00:00:00 2001 From: David Michaels Date: Mon, 8 Jul 2024 13:02:58 -0400 Subject: [PATCH 08/20] added hack_for_elasticsearch_numpy_usage.py for numpy>2 issue --- docs/source/dcicutils.rst | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/docs/source/dcicutils.rst b/docs/source/dcicutils.rst index b33b8a2b5..653e4e306 100644 --- a/docs/source/dcicutils.rst +++ b/docs/source/dcicutils.rst @@ -225,6 +225,13 @@ glacier_utils :members: +hack_for_elasticsearch_numpy_usage +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. automodule:: dcicutils.hack_for_elasticsearch_numpy_usage + :members: + + http_utils ^^^^^^^^^^^ From d428612735baba49839a8ab031551a7901cc9c18 Mon Sep 17 00:00:00 2001 From: David Michaels Date: Mon, 8 Jul 2024 13:12:45 -0400 Subject: [PATCH 09/20] added hack_for_elasticsearch_numpy_usage.py for numpy>2 issue --- CHANGELOG.rst | 3 ++- pyproject.toml | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 3f3f1b402..4d7b3300d 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -6,8 +6,9 @@ dcicutils Change Log ---------- -8.13.1 +8.13.2 ====== +* N.B. Accidentially tagged/pushed 8.13.1 -> PLEASE IGNORE VERSION: 8.13.1 * Fallout from Python 3.12 support. - Though dcicutils is not dependent on numpy, elasticsearch tries to import it, and if it is installed and if it is a version greater than 1.x, we get this error: diff --git a/pyproject.toml b/pyproject.toml index 103c35308..629ede824 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "dcicutils" -version = "8.13.0.1b1" # TODO: To become 8.13.1 +version = "8.13.0.1b1" # TODO: To become 8.13.2 description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources" authors = ["4DN-DCIC Team "] license = "MIT" From fbd3a4b06eeb55249a1679655def7a3480a84dae Mon Sep 17 00:00:00 2001 From: David Michaels Date: Mon, 8 Jul 2024 13:13:07 -0400 Subject: [PATCH 10/20] added hack_for_elasticsearch_numpy_usage.py for numpy>2 issue --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 629ede824..2a95f9161 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "dcicutils" -version = "8.13.0.1b1" # TODO: To become 8.13.2 +version = "8.13.0.1b2" # TODO: To become 8.13.2 description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources" authors = ["4DN-DCIC Team "] license = "MIT" From 02ab6d6868471c80ac63a4dc2e9c7b202517b5f7 Mon Sep 17 00:00:00 2001 From: David Michaels Date: Mon, 8 Jul 2024 14:00:52 -0400 Subject: [PATCH 11/20] unit test for elasticsearch/numpy usage hack --- test/test_hack_for_elasticsearch_numpy_usage.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 test/test_hack_for_elasticsearch_numpy_usage.py diff --git a/test/test_hack_for_elasticsearch_numpy_usage.py b/test/test_hack_for_elasticsearch_numpy_usage.py new file mode 100644 index 000000000..10bb8266f --- /dev/null +++ b/test/test_hack_for_elasticsearch_numpy_usage.py @@ -0,0 +1,13 @@ +import subprocess +import sys + + +def test_hack_for_elasticsearch_numpy_usage(): + try: + subprocess.run("pip install numpy==2.0.0".split()) + for module in [module_name for module_name in sys.modules + if module_name.startswith("elasticsearch") or module_name.startswith("dcicutils")]: + del sys.modules[module] + import dcicutils.ff_utils # noqa + finally: + subprocess.run("pip uninstall --yes numpy".split()) From aafd547425e6e93ee325d6118d9250efc82288aa Mon Sep 17 00:00:00 2001 From: David Michaels Date: Mon, 8 Jul 2024 14:14:31 -0400 Subject: [PATCH 12/20] unit test for elasticsearch/numpy usage hack --- test/test_hack_for_elasticsearch_numpy_usage.py | 1 + 1 file changed, 1 insertion(+) diff --git a/test/test_hack_for_elasticsearch_numpy_usage.py b/test/test_hack_for_elasticsearch_numpy_usage.py index 10bb8266f..efd525ca4 100644 --- a/test/test_hack_for_elasticsearch_numpy_usage.py +++ b/test/test_hack_for_elasticsearch_numpy_usage.py @@ -2,6 +2,7 @@ import sys +@pytest.mark.skip("This test seems to break others intermittently probably because messing with numpy installation.") def test_hack_for_elasticsearch_numpy_usage(): try: subprocess.run("pip install numpy==2.0.0".split()) From 3951f53698fe1b9fb14dbab976206ae1495ac0ed Mon Sep 17 00:00:00 2001 From: David Michaels Date: Mon, 8 Jul 2024 14:14:50 -0400 Subject: [PATCH 13/20] unit test for elasticsearch/numpy usage hack --- test/test_hack_for_elasticsearch_numpy_usage.py | 1 + 1 file changed, 1 insertion(+) diff --git a/test/test_hack_for_elasticsearch_numpy_usage.py b/test/test_hack_for_elasticsearch_numpy_usage.py index efd525ca4..937eb73ac 100644 --- a/test/test_hack_for_elasticsearch_numpy_usage.py +++ b/test/test_hack_for_elasticsearch_numpy_usage.py @@ -1,3 +1,4 @@ +import pytest import subprocess import sys From 9b7b0bc6ca9e583845825301adaaaaaf039580dc Mon Sep 17 00:00:00 2001 From: David Michaels Date: Mon, 8 Jul 2024 14:21:56 -0400 Subject: [PATCH 14/20] unit test for elasticsearch/numpy usage hack --- Makefile | 24 ++++++++++++------- ...test_hack_for_elasticsearch_numpy_usage.py | 3 ++- 2 files changed, 18 insertions(+), 9 deletions(-) diff --git a/Makefile b/Makefile index f4b8dccb2..a59aed836 100644 --- a/Makefile +++ b/Makefile @@ -18,61 +18,69 @@ build: # builds test: # runs default tests, which are the unit tests make test-units make test-static + make test-last test-for-ga: poetry run flake8 dcicutils poetry run flake8 test --exclude=data_files make test-units-with-coverage + mnake test-last + +test-last: + pytest -vv -m "last" retest: # runs only failed tests from the last test run. (if no failures, it seems to run all?? -kmp 17-Dec-2020) - poetry run pytest -vv -r w --last-failed + poetry run pytest -vv -r w --last-failed -m "not last" test-all: # you have to be really brave to want this. a lot of things will err @git log -1 --decorate | head -1 @date - poetry run pytest -vv -r w + poetry run pytest -vv -r w -m "not last" + make test-last @git log -1 --decorate | head -1 @date test-most: # leaves out things that will probably err but runs unit tests and both kinds of integrations @git log -1 --decorate | head -1 @date - poetry run pytest -vv -r w -m "not static and not beanstalk_failure and not direct_es_query" + poetry run pytest -vv -r w -m "not static and not beanstalk_failure and not direct_es_query and not last" @git log -1 --decorate | head -1 @date test-units-with-coverage: @git log -1 --decorate | head -1 @date - poetry run coverage run --source dcicutils -m pytest -vv -r w -m "not static and not integratedx and not beanstalk_failure and not direct_es_query" + poetry run coverage run --source dcicutils -m pytest -vv -r w -m "not static and not integratedx and not beanstalk_failure and not direct_es_query and not last" + make test-last @git log -1 --decorate | head -1 @date test-units: # runs unit tests (and integration tests not backed by a unit test) @git log -1 --decorate | head -1 @date - poetry run pytest -vv -r w -m "not static and not integratedx and not beanstalk_failure and not direct_es_query" + poetry run pytest -vv -r w -m "not static and not integratedx and not beanstalk_failure and not direct_es_query and not last" + make test-last @git log -1 --decorate | head -1 @date test-integrations: # runs integration tests @git log -1 --decorate | head -1 @date - poetry run pytest -vv -r w -m "not static and (integrated or integratedx) and not beanstalk_failure and not direct_es_query" + poetry run pytest -vv -r w -m "not static and (integrated or integratedx) and not beanstalk_failure and not direct_es_query and not last" @git log -1 --decorate | head -1 @date test-direct-es-query: # must be called inside VPC (e.g., from foursight after cloning repo, setting up venv, etc) @git log -1 --decorate | head -1 @date - poetry run pytest -vv -r w -m "direct_es_query" + poetry run pytest -vv -r w -m "direct_es_query and not last" @git log -1 --decorate | head -1 @date test-static: @git log -1 --decorate | head -1 @date - poetry run pytest -vv -r w -m "static" + poetry run pytest -vv -r w -m "static and not last" poetry run flake8 dcicutils poetry run flake8 test --exclude=data_files @git log -1 --decorate | head -1 diff --git a/test/test_hack_for_elasticsearch_numpy_usage.py b/test/test_hack_for_elasticsearch_numpy_usage.py index 937eb73ac..bda21b2b7 100644 --- a/test/test_hack_for_elasticsearch_numpy_usage.py +++ b/test/test_hack_for_elasticsearch_numpy_usage.py @@ -3,7 +3,8 @@ import sys -@pytest.mark.skip("This test seems to break others intermittently probably because messing with numpy installation.") +pytestmark = [pytest.mark.last] + def test_hack_for_elasticsearch_numpy_usage(): try: subprocess.run("pip install numpy==2.0.0".split()) From 3e5999efd19a0de8719422ecfea00ccb2732f2a2 Mon Sep 17 00:00:00 2001 From: David Michaels Date: Mon, 8 Jul 2024 14:26:54 -0400 Subject: [PATCH 15/20] minor updates to utility view-portal-object script --- test/test_hack_for_elasticsearch_numpy_usage.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_hack_for_elasticsearch_numpy_usage.py b/test/test_hack_for_elasticsearch_numpy_usage.py index bda21b2b7..a3f2ce835 100644 --- a/test/test_hack_for_elasticsearch_numpy_usage.py +++ b/test/test_hack_for_elasticsearch_numpy_usage.py @@ -2,9 +2,9 @@ import subprocess import sys - pytestmark = [pytest.mark.last] + def test_hack_for_elasticsearch_numpy_usage(): try: subprocess.run("pip install numpy==2.0.0".split()) From fb05879372bbbc19a45a85dd17d677942b0ea8af Mon Sep 17 00:00:00 2001 From: David Michaels Date: Mon, 8 Jul 2024 14:33:02 -0400 Subject: [PATCH 16/20] minor updates to utility view-portal-object script --- pytest.ini | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 pytest.ini diff --git a/pytest.ini b/pytest.ini new file mode 100644 index 000000000..0cd76e245 --- /dev/null +++ b/pytest.ini @@ -0,0 +1,3 @@ +[pytest] +markers = + last: run these tests last From 8bd066fbfaf3be79244f5ce156af17e309f13e9d Mon Sep 17 00:00:00 2001 From: David Michaels Date: Mon, 8 Jul 2024 14:41:24 -0400 Subject: [PATCH 17/20] mistake in makefile wrt pytest --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index a59aed836..271001712 100644 --- a/Makefile +++ b/Makefile @@ -24,10 +24,10 @@ test-for-ga: poetry run flake8 dcicutils poetry run flake8 test --exclude=data_files make test-units-with-coverage - mnake test-last + make test-last test-last: - pytest -vv -m "last" + poetry run pytest -vv -m "last" retest: # runs only failed tests from the last test run. (if no failures, it seems to run all?? -kmp 17-Dec-2020) poetry run pytest -vv -r w --last-failed -m "not last" From 3ed91605b2981dd59638312a7a74ba8dcae4eea9 Mon Sep 17 00:00:00 2001 From: David Michaels Date: Mon, 8 Jul 2024 14:45:02 -0400 Subject: [PATCH 18/20] mistake in makefile wrt pytest --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 2a95f9161..d13a9543d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "dcicutils" -version = "8.13.0.1b2" # TODO: To become 8.13.2 +version = "8.13.2" # accidentally pushed 8.13.1 before ready - pushing this corrected 8.13.2 one before ready too but better than 8.13.1 # "8.13.0.1b3" # TODO: To become 8.13.2 description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources" authors = ["4DN-DCIC Team "] license = "MIT" From bafa2dcd6db4ff90451d2c1fbbc9b1a624faf7ef Mon Sep 17 00:00:00 2001 From: David Michaels Date: Mon, 8 Jul 2024 18:57:42 -0400 Subject: [PATCH 19/20] Added/updated dev/troubleshooting scripts. --- CHANGELOG.rst | 7 +- dcicutils/scripts/update_portal_object.py | 430 ++++++++++++++++++++++ dcicutils/scripts/view_portal_object.py | 248 ++++++++----- pyproject.toml | 3 +- 4 files changed, 583 insertions(+), 105 deletions(-) create mode 100644 dcicutils/scripts/update_portal_object.py diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 4d7b3300d..1a1aa2ab9 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -6,15 +6,18 @@ dcicutils Change Log ---------- -8.13.2 +8.13.3 ====== -* N.B. Accidentially tagged/pushed 8.13.1 -> PLEASE IGNORE VERSION: 8.13.1 +* N.B. Accidentially tagged/pushed 8.13.1 -> PLEASE IGNORE VERSION: 8.13.1 (subsequently yanked). + And then to correct (while no permission to delete above) pushed unofficial 8.13.2. * Fallout from Python 3.12 support. - Though dcicutils is not dependent on numpy, elasticsearch tries to import it, and if it is installed and if it is a version greater than 1.x, we get this error: AttributeError: `np.float_` was removed in the NumPy 2.0 release. Use `np.float64` instead. So added a hack in hack_for_elasticsearch_numpy_usage.py for this specific case; to be imported before we import elasticsearch modules. +* Added/updated scripts from submitr: view_portal_object.py and update_portal_object.py + for dev/troubleshooting purposes. 8.13.0 diff --git a/dcicutils/scripts/update_portal_object.py b/dcicutils/scripts/update_portal_object.py new file mode 100644 index 000000000..0918b8f26 --- /dev/null +++ b/dcicutils/scripts/update_portal_object.py @@ -0,0 +1,430 @@ +# ------------------------------------------------------------------------------------------------------ +# Command-line utility to update (post, patch, upsert) portal objects for SMaHT/CGAP/Fourfront. +# ------------------------------------------------------------------------------------------------------ +# Example commands: +# update-portal-object --post file_format.json +# update-portal-object --upsert directory-with-schema-named-dot-json-files +# update-portal-object --patch file-not-named-for-schema-name.json --schema UnalignedReads +# -------------------------------------------------------------------------------------------------- + +import argparse +from functools import lru_cache +import glob +import io +import json +import os +import sys +from typing import Callable, List, Optional, Tuple, Union +from dcicutils.command_utils import yes_or_no +from dcicutils.common import ORCHESTRATED_APPS, APP_SMAHT +from dcicutils.ff_utils import delete_metadata, purge_metadata +from dcicutils.misc_utils import get_error_message, PRINT +from dcicutils.portal_utils import Portal as PortalFromUtils + + +class Portal(PortalFromUtils): + + def delete_metadata(self, object_id: str) -> Optional[dict]: + if isinstance(object_id, str) and object_id and self.key: + return delete_metadata(obj_id=object_id, key=self.key) + return None + + def purge_metadata(self, object_id: str) -> Optional[dict]: + if isinstance(object_id, str) and object_id and self.key: + return purge_metadata(obj_id=object_id, key=self.key) + return None + + +_DEFAULT_APP = "smaht" +_SMAHT_ENV_ENVIRON_NAME = "SMAHT_ENV" + +# Schema properties to ignore (by default) for the view schema usage. +_SCHEMAS_IGNORE_PROPERTIES = [ + "date_created", + "last_modified", + "principals_allowed", + "submitted_by", + "schema_version" +] + +_SCHEMA_ORDER = [ # See: smaht-portal/src/encoded/project/loadxl.py + "access_key", + "user", + "consortium", + "submission_center", + "file_format", + "quality_metric", + "output_file", + "reference_file", + "reference_genome", + "software", + "tracking_item", + "workflow", + "workflow_run", + "meta_workflow", + "meta_workflow_run", + "image", + "document", + "static_section", + "page", + "filter_set", + "higlass_view_config", + "ingestion_submission", + "ontology_term", + "protocol", + "donor", + "demographic", + "medical_history", + "diagnosis", + "exposure", + "family_history", + "medical_treatment", + "death_circumstances", + "tissue_collection", + "tissue", + "histology", + "cell_line", + "cell_culture", + "cell_culture_mixture", + "preparation_kit", + "treatment", + "sample_preparation", + "tissue_sample", + "cell_culture_sample", + "cell_sample", + "analyte", + "analyte_preparation", + "assay", + "library", + "library_preparation", + "sequencer", + "basecalling", + "sequencing", + "file_set", + "unaligned_reads", + "aligned_reads", + "variant_calls", +] + + +def main(): + + parser = argparse.ArgumentParser(description="View Portal object.") + parser.add_argument("--env", "-e", type=str, required=False, default=None, + help=f"Environment name (key from ~/.smaht-keys.json).") + parser.add_argument("--app", type=str, required=False, default=None, + help=f"Application name (one of: smaht, cgap, fourfront).") + parser.add_argument("--schema", type=str, required=False, default=None, + help="Use named schema rather than infer from post/patch/upsert file name.") + parser.add_argument("--post", type=str, required=False, default=None, help="POST data.") + parser.add_argument("--patch", type=str, required=False, default=None, help="PATCH data.") + parser.add_argument("--upsert", type=str, required=False, default=None, help="Upsert data.") + parser.add_argument("--delete", type=str, required=False, default=None, help="Delete data.") + parser.add_argument("--purge", type=str, required=False, default=None, help="Purge data.") + parser.add_argument("--confirm", action="store_true", required=False, default=False, help="Confirm before action.") + parser.add_argument("--verbose", action="store_true", required=False, default=False, help="Verbose output.") + parser.add_argument("--quiet", action="store_true", required=False, default=False, help="Quiet output.") + parser.add_argument("--debug", action="store_true", required=False, default=False, help="Debugging output.") + args = parser.parse_args() + + def usage(message: Optional[str] = None) -> None: + nonlocal parser + _print(message) if isinstance(message, str) else None + parser.print_help() + sys.exit(1) + + if app := args.app: + if (app not in ORCHESTRATED_APPS) and ((app := app.lower()) not in ORCHESTRATED_APPS): + usage(f"ERROR: Unknown app name; must be one of: {' | '.join(ORCHESTRATED_APPS)}") + else: + app = APP_SMAHT + + portal = _create_portal(env=args.env, app=app, verbose=args.verbose, debug=args.debug) + + if explicit_schema_name := args.schema: + schema, explicit_schema_name = _get_schema(portal, explicit_schema_name) + if not schema: + usage(f"ERROR: Unknown schema name: {args.schema}") + + if not (args.post or args.patch or args.upsert or args.delete or args.purge): + usage() + + if args.post: + _post_or_patch_or_upsert(portal=portal, + file_or_directory=args.post, + explicit_schema_name=explicit_schema_name, + update_function=post_data, + update_action_name="POST", + confirm=args.confirm, verbose=args.verbose, quiet=args.quiet, debug=args.debug) + if args.patch: + _post_or_patch_or_upsert(portal=portal, + file_or_directory=args.patch, + explicit_schema_name=explicit_schema_name, + update_function=patch_data, + update_action_name="PATCH", + confirm=args.confirm, verbose=args.verbose, quiet=args.quiet, debug=args.debug) + if args.upsert: + _post_or_patch_or_upsert(portal=portal, + file_or_directory=args.upsert, + explicit_schema_name=explicit_schema_name, + update_function=upsert_data, + update_action_name="UPSERT", + confirm=args.confirm, verbose=args.verbose, quiet=args.quiet, debug=args.debug) + + if args.delete: + if not portal.get_metadata(args.delete, raise_exception=False): + _print(f"Cannot find given object: {args.delete}") + sys.exit(1) + if yes_or_no(f"Do you really want to delete this item: {args.delete} ?"): + portal.delete_metadata(args.delete) + + if args.purge: + if not portal.get_metadata(args.purge, raise_exception=False): + _print(f"Cannot find given object: {args.purge}") + sys.exit(1) + if yes_or_no(f"Do you really want to purge this item: {args.purge} ?"): + portal.delete_metadata(args.purge) + portal.purge_metadata(args.purge) + + +def _post_or_patch_or_upsert(portal: Portal, file_or_directory: str, + explicit_schema_name: str, + update_function: Callable, update_action_name: str, + confirm: bool = False, verbose: bool = False, + quiet: bool = False, debug: bool = False) -> None: + + def is_schema_name_list(portal: Portal, keys: list) -> bool: + if isinstance(keys, list): + for key in keys: + if portal.get_schema(key) is None: + return False + return True + return False + + def post_or_patch_or_upsert(portal: Portal, file: str, schema_name: Optional[str], + confirm: bool = False, verbose: bool = False, + quiet: bool = False, debug: bool = False) -> None: + + nonlocal update_function, update_action_name + if not quiet: + _print(f"Processing {update_action_name} file: {file}") + if data := _read_json_from_file(file): + if isinstance(data, dict): + if isinstance(schema_name, str) and schema_name: + if debug: + _print(f"DEBUG: File ({file}) contains an object of type: {schema_name}") + update_function(portal, data, schema_name, confirm=confirm, + file=file, verbose=verbose, debug=debug) + elif is_schema_name_list(portal, list(data.keys())): + if debug: + _print(f"DEBUG: File ({file}) contains a dictionary of schema names.") + for schema_name in data: + if isinstance(schema_data := data[schema_name], list): + if debug: + _print(f"DEBUG: Processing {update_action_name}s for type: {schema_name}") + for index, item in enumerate(schema_data): + update_function(portal, item, schema_name, confirm=confirm, + file=file, index=index, verbose=verbose, debug=debug) + else: + _print(f"WARNING: File ({file}) contains schema item which is not a list: {schema_name}") + else: + _print(f"WARNING: File ({file}) contains unknown item type.") + elif isinstance(data, list): + if debug: + _print(f"DEBUG: File ({file}) contains a list of objects of type: {schema_name}") + for index, item in enumerate(data): + update_function(portal, item, schema_name, confirm=confirm, + file=file, index=index, verbose=verbose, debug=debug) + if debug: + _print(f"DEBUG: Processing {update_action_name} file done: {file}") + + if os.path.isdir(file_or_directory): + if ((files := glob.glob(os.path.join(file_or_directory, "*.json"))) and + (files_and_schemas := _file_names_to_ordered_file_and_schema_names(portal, files))): # noqa + for file_and_schema in files_and_schemas: + if not (file := file_and_schema[0]): + continue + if not (schema_name := file_and_schema[1]) and not (schema_name := explicit_schema_name): + _print(f"ERROR: Schema cannot be inferred from file name and --schema not specified: {file}") + continue + post_or_patch_or_upsert(portal, file_and_schema[0], schema_name=schema_name, + confirm=confirm, quiet=quiet, verbose=verbose, debug=debug) + elif os.path.isfile(file := file_or_directory): + if ((schema_name := _get_schema_name_from_schema_named_json_file_name(portal, file)) or + (schema_name := explicit_schema_name)): # noqa + post_or_patch_or_upsert(portal, file, schema_name=schema_name, + confirm=confirm, quiet=quiet, verbose=verbose, debug=debug) + else: + post_or_patch_or_upsert(portal, file, schema_name=schema_name, + confirm=confirm, quiet=quiet, verbose=verbose, debug=debug) + # _print(f"ERROR: Schema cannot be inferred from file name and --schema not specified: {file}") + # return + else: + _print(f"ERROR: Cannot find file or directory: {file_or_directory}") + + +def post_data(portal: Portal, data: dict, schema_name: str, confirm: bool = False, + file: Optional[str] = None, index: int = 0, + verbose: bool = False, debug: bool = False) -> None: + if not (identifying_path := portal.get_identifying_path(data, portal_type=schema_name)): + if isinstance(file, str) and isinstance(index, int): + _print(f"ERROR: Item for POST has no identifying property: {file} (#{index + 1})") + else: + _print(f"ERROR: Item for POST has no identifying property.") + return + if portal.get_metadata(identifying_path, raise_exception=False): + _print(f"ERROR: Item for POST already exists: {identifying_path}") + return + if (confirm is True) and not yes_or_no(f"POST data for: {identifying_path} ?"): + return + if verbose: + _print(f"POST {schema_name} item: {identifying_path}") + try: + portal.post_metadata(schema_name, data) + if debug: + _print(f"DEBUG: POST {schema_name} item done: {identifying_path}") + except Exception as e: + _print(f"ERROR: Cannot POST {schema_name} item: {identifying_path}") + _print(get_error_message(e)) + return + + +def patch_data(portal: Portal, data: dict, schema_name: str, confirm: bool = False, + file: Optional[str] = None, index: int = 0, + verbose: bool = False, debug: bool = False) -> None: + if not (identifying_path := portal.get_identifying_path(data, portal_type=schema_name)): + if isinstance(file, str) and isinstance(index, int): + _print(f"ERROR: Item for PATCH has no identifying property: {file} (#{index + 1})") + else: + _print(f"ERROR: Item for PATCH has no identifying property.") + return + if not portal.get_metadata(identifying_path, raise_exception=False): + _print(f"ERROR: Item for PATCH does not already exist: {identifying_path}") + return + if (confirm is True) and not yes_or_no(f"PATCH data for: {identifying_path}"): + return + if verbose: + _print(f"PATCH {schema_name} item: {identifying_path}") + try: + portal.patch_metadata(identifying_path, data) + if debug: + _print(f"DEBUG: PATCH {schema_name} item OK: {identifying_path}") + except Exception as e: + _print(f"ERROR: Cannot PATCH {schema_name} item: {identifying_path}") + _print(e) + return + + +def upsert_data(portal: Portal, data: dict, schema_name: str, confirm: bool = False, + file: Optional[str] = None, index: int = 0, + verbose: bool = False, debug: bool = False) -> None: + if not (identifying_path := portal.get_identifying_path(data, portal_type=schema_name)): + if isinstance(file, str) and isinstance(index, int): + _print(f"ERROR: Item for UPSERT has no identifying property: {file} (#{index + 1})") + else: + _print(f"ERROR: Item for UPSERT has no identifying property.") + return + exists = portal.get_metadata(identifying_path, raise_exception=False) + if ((confirm is True) and not yes_or_no(f"{'PATCH' if exists else 'POST'} data for: {identifying_path} ?")): + return + if verbose: + _print(f"{'PATCH' if exists else 'POST'} {schema_name} item: {identifying_path}") + try: + portal.post_metadata(schema_name, data) if not exists else portal.patch_metadata(identifying_path, data) + if debug: + _print(f"DEBUG: UPSERT {schema_name} item OK: {identifying_path}") + except Exception as e: + _print(f"ERROR: Cannot UPSERT {schema_name} item: {identifying_path}") + _print(e) + return + + +def _create_portal(env: Optional[str] = None, app: Optional[str] = None, + verbose: bool = False, debug: bool = False) -> Optional[Portal]: + + env_from_environ = None + if not env and (app == APP_SMAHT): + if env := os.environ.get(_SMAHT_ENV_ENVIRON_NAME): + env_from_environ = True + if not (portal := Portal(env, app=app) if env or app else None): + return None + if verbose: + if (env := portal.env) or (env := os.environ(_SMAHT_ENV_ENVIRON_NAME)): + _print(f"Portal environment" + f"{f' (from {_SMAHT_ENV_ENVIRON_NAME})' if env_from_environ else ''}: {portal.env}") + if portal.keys_file: + _print(f"Portal keys file: {portal.keys_file}") + if portal.key_id: + _print(f"Portal key prefix: {portal.key_id[0:2]}******") + if portal.server: + _print(f"Portal server: {portal.server}") + return portal + + +def _read_json_from_file(file: str) -> Optional[dict]: + try: + if not os.path.exists(file): + return None + with io.open(file, "r") as f: + try: + return json.load(f) + except Exception: + _print(f"ERROR: Cannot load JSON from file: {file}") + return None + except Exception: + _print(f"ERROR: Cannot open file: {file}") + return None + + +def _file_names_to_ordered_file_and_schema_names(portal: Portal, + files: Union[List[str], str]) -> List[Tuple[str, Optional[str]]]: + results = [] + if isinstance(files, str): + files = [files] + if not isinstance(files, list): + return results + for file in files: + if isinstance(file, str) and file: + results.append((file, _get_schema_name_from_schema_named_json_file_name(portal, file))) + ordered_results = [] + for schema_name in _SCHEMA_ORDER: + schema_name = portal.schema_name(schema_name) + if result := next((item for item in results if item[1] == schema_name), None): + ordered_results.append(result) + results.remove(result) + ordered_results.extend(results) if results else None + return ordered_results + + +def _get_schema_name_from_schema_named_json_file_name(portal: Portal, value: str) -> Optional[str]: + try: + if not value.endswith(".json"): + return None + _, schema_name = _get_schema(portal, os.path.basename(value[:-5])) + return schema_name + except Exception: + return False + + +@lru_cache(maxsize=1) +def _get_schemas(portal: Portal) -> Optional[dict]: + return portal.get_schemas() + + +@lru_cache(maxsize=100) +def _get_schema(portal: Portal, name: str) -> Tuple[Optional[dict], Optional[str]]: + if portal and name and (name := name.replace("_", "").replace("-", "").strip().lower()): + if schemas := _get_schemas(portal): + for schema_name in schemas: + if schema_name.replace("_", "").replace("-", "").strip().lower() == name.lower(): + return schemas[schema_name], schema_name + return None, None + + +def _print(*args, **kwargs) -> None: + PRINT(*args, **kwargs) + sys.stdout.flush() + + +if __name__ == "__main__": + main() diff --git a/dcicutils/scripts/view_portal_object.py b/dcicutils/scripts/view_portal_object.py index a6f2369be..bc28ccc12 100644 --- a/dcicutils/scripts/view_portal_object.py +++ b/dcicutils/scripts/view_portal_object.py @@ -62,9 +62,10 @@ import pyperclip import os import sys -from typing import Callable, List, Optional, Tuple +from typing import Callable, List, Optional, TextIO, Tuple, Union import yaml from dcicutils.captured_output import captured_output, uncaptured_output +from dcicutils.command_utils import yes_or_no from dcicutils.misc_utils import get_error_message, is_uuid, PRINT from dcicutils.portal_utils import Portal @@ -78,11 +79,15 @@ "schema_version" ] +_output_file: TextIO = None + def main(): + global _output_file + parser = argparse.ArgumentParser(description="View Portal object.") - parser.add_argument("uuid", type=str, + parser.add_argument("uuid", nargs="?", type=str, help=f"The uuid (or path) of the object to fetch and view. ") parser.add_argument("--ini", type=str, required=False, default=None, help=f"Name of the application .ini file.") @@ -97,11 +102,9 @@ def main(): parser.add_argument("--all", action="store_true", required=False, default=False, help="Include all properties for schema usage.") parser.add_argument("--raw", action="store_true", required=False, default=False, help="Raw output.") + parser.add_argument("--inserts", action="store_true", required=False, default=False, + help="Format output for subsequent inserts.") parser.add_argument("--tree", action="store_true", required=False, default=False, help="Tree output for schemas.") - parser.add_argument("--post", type=str, required=False, default=None, - help="POST data of the main arg type with data from file specified with this option.") - parser.add_argument("--patch", type=str, required=False, default=None, - help="PATCH data of the main arg type with data from file specified with this option.") parser.add_argument("--database", action="store_true", required=False, default=False, help="Read from database output.") parser.add_argument("--bool", action="store_true", required=False, @@ -109,6 +112,7 @@ def main(): parser.add_argument("--yaml", action="store_true", required=False, default=False, help="YAML output.") parser.add_argument("--copy", "-c", action="store_true", required=False, default=False, help="Copy object data to clipboard.") + parser.add_argument("--output", required=False, help="Output file.", type=str) parser.add_argument("--indent", required=False, default=False, help="Indent output.", type=int) parser.add_argument("--details", action="store_true", required=False, default=False, help="Detailed output.") parser.add_argument("--more-details", action="store_true", required=False, default=False, @@ -123,54 +127,57 @@ def main(): portal = _create_portal(ini=args.ini, env=args.env or os.environ.get("SMAHT_ENV"), server=args.server, app=args.app, verbose=args.verbose, debug=args.debug) - if args.uuid.lower() == "schemas" or args.uuid.lower() == "schema": + if not args.uuid: + _print("UUID or schema or path required.") + _exit(1) + + if args.output: + if os.path.exists(args.output): + if os.path.isdir(args.output): + _print(f"Specified output file already exists as a directory: {args.output}") + _exit(1) + elif os.path.isfile(args.output): + _print(f"Specified output file already exists: {args.output}") + if not yes_or_no(f"Do you want to overwrite this file?"): + _exit(0) + _output_file = io.open(args.output, "w") + + if args.uuid and ((args.uuid.lower() == "schemas") or (args.uuid.lower() == "schema")): _print_all_schema_names(portal=portal, details=args.details, more_details=args.more_details, all=args.all, tree=args.tree, raw=args.raw, raw_yaml=args.yaml) return - elif args.uuid.lower() == "info": # TODO: need word for what consortiums and submission centers are collectively + elif args.uuid and (args.uuid.lower() == "info"): if consortia := portal.get_metadata("/consortia?limit=1000"): - _print("Known Consortia:") + _print_output("Known Consortia:") consortia = sorted(consortia.get("@graph", []), key=lambda key: key.get("identifier")) for consortium in consortia: if ((consortium_name := consortium.get("identifier")) and (consortium_uuid := consortium.get("uuid"))): # noqa - _print(f"- {consortium_name}: {consortium_uuid}") + _print_output(f"- {consortium_name}: {consortium_uuid}") if submission_centers := portal.get_metadata("/submission-centers?limit=1000"): - _print("Known Submission Centers:") + _print_output("Known Submission Centers:") submission_centers = sorted(submission_centers.get("@graph", []), key=lambda key: key.get("identifier")) for submission_center in submission_centers: if ((submission_center_name := submission_center.get("identifier")) and (submission_center_uuid := submission_center.get("uuid"))): # noqa - _print(f"- {submission_center_name}: {submission_center_uuid}") + _print_output(f"- {submission_center_name}: {submission_center_uuid}") try: if file_formats := portal.get_metadata("/file-formats?limit=1000"): - _print("Known File Formats:") + _print_output("Known File Formats:") file_formats = sorted(file_formats.get("@graph", []), key=lambda key: key.get("identifier")) for file_format in file_formats: if ((file_format_name := file_format.get("identifier")) and (file_format_uuid := file_format.get("uuid"))): # noqa - _print(f"- {file_format_name}: {file_format_uuid}") + _print_output(f"- {file_format_name}: {file_format_uuid}") except Exception: - _print("Known File Formats: None") + _print_output("Known File Formats: None") return if _is_maybe_schema_name(args.uuid): args.schema = True if args.schema: - if args.post: - if post_data := _read_json_from_file(args.post): - if args.verbose: - _print(f"POSTing data from file ({args.post}) as type: {args.uuid}") - if isinstance(post_data, dict): - post_data = [post_data] - elif not isinstance(post_data, list): - _print(f"POST data neither list nor dictionary: {args.post}") - for item in post_data: - portal.post_metadata(args.uuid, item) - if args.verbose: - _print(f"Done POSTing data from file ({args.post}) as type: {args.uuid}") schema, schema_name = _get_schema(portal, args.uuid) if schema: if args.copy: @@ -178,49 +185,33 @@ def main(): if not args.raw: if parent_schema_name := _get_parent_schema_name(schema): if schema.get("isAbstract") is True: - _print(f"{schema_name} | parent: {parent_schema_name} | abstract") + _print_output(f"{schema_name} | parent: {parent_schema_name} | abstract") else: - _print(f"{schema_name} | parent: {parent_schema_name}") + _print_output(f"{schema_name} | parent: {parent_schema_name}") else: - _print(schema_name) + _print_output(schema_name) _print_schema(schema, details=args.details, more_details=args.details, all=args.all, raw=args.raw, raw_yaml=args.yaml) return - elif args.patch: - if patch_data := _read_json_from_file(args.patch): - if args.verbose: - _print(f"PATCHing data from file ({args.patch}) for object: {args.uuid}") - if isinstance(patch_data, dict): - patch_data = [patch_data] - elif not isinstance(patch_data, list): - _print(f"PATCH data neither list nor dictionary: {args.patch}") - for item in patch_data: - portal.patch_metadata(args.uuid, item) - if args.verbose: - _print(f"Done PATCHing data from file ({args.patch}) as type: {args.uuid}") - return - else: - _print(f"No PATCH data found in file: {args.patch}") - sys.exit(1) - data = _get_portal_object(portal=portal, uuid=args.uuid, raw=args.raw, + data = _get_portal_object(portal=portal, uuid=args.uuid, raw=args.raw, inserts=args.inserts, database=args.database, check=args.bool, verbose=args.verbose) if args.bool: if data: _print(f"{args.uuid}: found") - sys.exit(0) + _exit(0) else: _print(f"{args.uuid}: not found") - sys.exit(1) + _exit(1) if args.copy: pyperclip.copy(json.dumps(data, indent=4)) if args.yaml: - _print(yaml.dump(data)) + _print_output(yaml.dump(data)) else: if args.indent > 0: - _print(_format_json_with_indent(data, indent=args.indent)) + _print_output(_format_json_with_indent(data, indent=args.indent)) else: - _print(json.dumps(data, default=str, indent=4)) + _print_output(json.dumps(data, default=str, indent=4)) def _format_json_with_indent(value: dict, indent: int = 0) -> Optional[str]: @@ -254,7 +245,7 @@ def _create_portal(ini: str, env: Optional[str] = None, def _get_portal_object(portal: Portal, uuid: str, - raw: bool = False, database: bool = False, + raw: bool = False, inserts: bool = False, database: bool = False, check: bool = False, verbose: bool = False) -> dict: response = None try: @@ -262,7 +253,7 @@ def _get_portal_object(portal: Portal, uuid: str, path = f"/{uuid}" else: path = uuid - response = portal.get(path, raw=raw, database=database) + response = portal.get(path, raw=raw or inserts, database=database) except Exception as e: if "404" in str(e) and "not found" in str(e).lower(): _print(f"Portal object not found at {portal.server}: {uuid}") @@ -278,7 +269,21 @@ def _get_portal_object(portal: Portal, uuid: str, if not response.json: _exit(f"Invalid JSON getting Portal object: {uuid}") response = response.json() - if raw: + if inserts: + # Format results as suitable for inserts (e.g. via update-portal-object). + response.pop("schema_version", None) + if ((isinstance(results := response.get("@graph"), list) and results) and + (isinstance(results_type := response.get("@type"), list) and results_type) and + (isinstance(results_type := results_type[0], str) and results_type.endswith("SearchResults")) and + (results_type := results_type[0:-len("SearchResults")])): # noqa + for result in results: + result.pop("schema_version", None) + response = {f"{results_type}": results} + # Get the result as non-raw so we can get its type. + elif ((response_cooked := portal.get(path, database=database)) and + (isinstance(response_type := response_cooked.json().get("@type"), list) and response_type)): + response = {f"{response_type[0]}": [response]} + elif raw: response.pop("schema_version", None) return response @@ -292,7 +297,7 @@ def _get_schema(portal: Portal, name: str) -> Tuple[Optional[dict], Optional[str if portal and name and (name := name.replace("_", "").replace("-", "").strip().lower()): if schemas := _get_schemas(portal): for schema_name in schemas: - if schema_name.replace("_", "").replace("-", "").strip().lower() == name: + if schema_name.replace("_", "").replace("-", "").strip().lower() == name.lower(): return schemas[schema_name], schema_name return None, None @@ -303,13 +308,37 @@ def _is_maybe_schema_name(value: str) -> bool: return False +def _is_schema_name(portal: Portal, value: str) -> bool: + try: + return _get_schema(portal, value)[0] is not None + except Exception: + return False + + +def _is_schema_named_json_file_name(portal: Portal, value: str) -> bool: + try: + return value.endswith(".json") and _is_schema_name(portal, os.path.basename(value[:-5])) + except Exception: + return False + + +def _get_schema_name_from_schema_named_json_file_name(portal: Portal, value: str) -> Optional[str]: + try: + if not value.endswith(".json"): + return None + _, schema_name = _get_schema(portal, os.path.basename(value[:-5])) + return schema_name + except Exception: + return False + + def _print_schema(schema: dict, details: bool = False, more_details: bool = False, all: bool = False, raw: bool = False, raw_yaml: bool = False) -> None: if raw: if raw_yaml: - _print(yaml.dump(schema)) + _print_output(yaml.dump(schema)) else: - _print(json.dumps(schema, indent=4)) + _print_output(json.dumps(schema, indent=4)) return _print_schema_info(schema, details=details, more_details=more_details, all=all) @@ -322,37 +351,37 @@ def _print_schema_info(schema: dict, level: int = 0, identifying_properties = schema.get("identifyingProperties") if level == 0: if required_properties := schema.get("required"): - _print("- required properties:") + _print_output("- required properties:") for required_property in sorted(list(set(required_properties))): if not all and required_property in _SCHEMAS_IGNORE_PROPERTIES: continue if property_type := (info := schema.get("properties", {}).get(required_property, {})).get("type"): if property_type == "array" and (array_type := info.get("items", {}).get("type")): - _print(f" - {required_property}: {property_type} of {array_type}") + _print_output(f" - {required_property}: {property_type} of {array_type}") else: - _print(f" - {required_property}: {property_type}") + _print_output(f" - {required_property}: {property_type}") else: - _print(f" - {required_property}") + _print_output(f" - {required_property}") if isinstance(any_of := schema.get("anyOf"), list): if ((any_of == [{"required": ["submission_centers"]}, {"required": ["consortia"]}]) or (any_of == [{"required": ["consortia"]}, {"required": ["submission_centers"]}])): # noqa # Very very special case. - _print(f" - at least one of:") - _print(f" - consortia: array of string") - _print(f" - submission_centers: array of string") + _print_output(f" - at least one of:") + _print_output(f" - consortia: array of string") + _print_output(f" - submission_centers: array of string") required = required_properties if identifying_properties := schema.get("identifyingProperties"): - _print("- identifying properties:") + _print_output("- identifying properties:") for identifying_property in sorted(list(set(identifying_properties))): if not all and identifying_property in _SCHEMAS_IGNORE_PROPERTIES: continue if property_type := (info := schema.get("properties", {}).get(identifying_property, {})).get("type"): if property_type == "array" and (array_type := info.get("items", {}).get("type")): - _print(f" - {identifying_property}: {property_type} of {array_type}") + _print_output(f" - {identifying_property}: {property_type} of {array_type}") else: - _print(f" - {identifying_property}: {property_type}") + _print_output(f" - {identifying_property}: {property_type}") else: - _print(f" - {identifying_property}") + _print_output(f" - {identifying_property}") if properties := schema.get("properties"): reference_properties = [] for property_name in properties: @@ -362,16 +391,16 @@ def _print_schema_info(schema: dict, level: int = 0, if link_to := property.get("linkTo"): reference_properties.append({"name": property_name, "ref": link_to}) if reference_properties: - _print("- reference properties:") + _print_output("- reference properties:") for reference_property in sorted(reference_properties, key=lambda key: key["name"]): - _print(f" - {reference_property['name']}: {reference_property['ref']}") + _print_output(f" - {reference_property['name']}: {reference_property['ref']}") if schema.get("additionalProperties") is True: - _print(f" - additional properties are allowed") + _print_output(f" - additional properties are allowed") if not more_details: return if properties := (schema.get("properties") if level == 0 else schema): if level == 0: - _print("- properties:") + _print_output("- properties:") for property_name in sorted(properties): if not all and property_name in _SCHEMAS_IGNORE_PROPERTIES: continue @@ -392,7 +421,7 @@ def _print_schema_info(schema: dict, level: int = 0, property_type = "open ended object" if property.get("calculatedProperty"): suffix += f" | calculated" - _print(f"{spaces}- {property_name}: {property_type}{suffix}") + _print_output(f"{spaces}- {property_name}: {property_type}{suffix}") _print_schema_info(object_properties, level=level + 1, details=details, more_details=more_details, all=all, required=property.get("required")) @@ -416,28 +445,28 @@ def _print_schema_info(schema: dict, level: int = 0, if property_type := property_items.get("type"): if property_type == "object": suffix = "" - _print(f"{spaces}- {property_name}: array of object{suffix}") + _print_output(f"{spaces}- {property_name}: array of object{suffix}") _print_schema_info(property_items.get("properties"), level=level + 1, details=details, more_details=more_details, all=all, required=property_items.get("required")) elif property_type == "array": # This (array-of-array) never happens to occur at this time (February 2024). - _print(f"{spaces}- {property_name}: array of array{suffix}") + _print_output(f"{spaces}- {property_name}: array of array{suffix}") else: - _print(f"{spaces}- {property_name}: array of {property_type}{suffix}") + _print_output(f"{spaces}- {property_name}: array of {property_type}{suffix}") else: - _print(f"{spaces}- {property_name}: array{suffix}") + _print_output(f"{spaces}- {property_name}: array{suffix}") else: - _print(f"{spaces}- {property_name}: array{suffix}") + _print_output(f"{spaces}- {property_name}: array{suffix}") if enumeration: nenums = 0 maxenums = 15 for enum in sorted(enumeration): if (nenums := nenums + 1) >= maxenums: if (remaining := len(enumeration) - nenums) > 0: - _print(f"{spaces} - [{remaining} more ...]") + _print_output(f"{spaces} - [{remaining} more ...]") break - _print(f"{spaces} - {enum}") + _print_output(f"{spaces} - {enum}") else: if isinstance(property_type, list): property_type = " or ".join(sorted(property_type)) @@ -479,18 +508,18 @@ def _print_schema_info(schema: dict, level: int = 0, suffix += f" | max length: {max_length}" if (min_length := property.get("minLength")) is not None: suffix += f" | min length: {min_length}" - _print(f"{spaces}- {property_name}: {property_type}{suffix}") + _print_output(f"{spaces}- {property_name}: {property_type}{suffix}") if enumeration: nenums = 0 maxenums = 15 for enum in sorted(enumeration): if (nenums := nenums + 1) >= maxenums: if (remaining := len(enumeration) - nenums) > 0: - _print(f"{spaces} - [{remaining} more ...]") + _print_output(f"{spaces} - [{remaining} more ...]") break - _print(f"{spaces} - {enum}") + _print_output(f"{spaces} - {enum}") else: - _print(f"{spaces}- {property_name}") + _print_output(f"{spaces}- {property_name}") def _print_all_schema_names(portal: Portal, @@ -501,9 +530,9 @@ def _print_all_schema_names(portal: Portal, if raw: if raw_yaml: - _print(yaml.dump(schemas)) + _print_output(yaml.dump(schemas)) else: - _print(json.dumps(schemas, indent=4)) + _print_output(json.dumps(schemas, indent=4)) return if tree: @@ -513,14 +542,14 @@ def _print_all_schema_names(portal: Portal, for schema_name in sorted(schemas.keys()): if parent_schema_name := _get_parent_schema_name(schemas[schema_name]): if schemas[schema_name].get("isAbstract") is True: - _print(f"{schema_name} | parent: {parent_schema_name} | abstract") + _print_output(f"{schema_name} | parent: {parent_schema_name} | abstract") else: - _print(f"{schema_name} | parent: {parent_schema_name}") + _print_output(f"{schema_name} | parent: {parent_schema_name}") else: if schemas[schema_name].get("isAbstract") is True: - _print(f"{schema_name} | abstract") + _print_output(f"{schema_name} | abstract") else: - _print(schema_name) + _print_output(schema_name) if details: _print_schema(schemas[schema_name], details=details, more_details=more_details, all=all) @@ -559,8 +588,7 @@ def name_of(name: str) -> str: # noqa def _print_tree(root_name: Optional[str], children_of: Callable, has_children: Optional[Callable] = None, - name_of: Optional[Callable] = None, - print: Callable = print) -> None: + name_of: Optional[Callable] = None) -> None: """ Recursively prints as a tree structure the given root name and any of its children (again, recursively) as specified by the given children_of callable; @@ -589,26 +617,26 @@ def tree_generator(name: str, prefix: str = ""): if has_children(path): extension = branch if pointer == tee else space yield from tree_generator(path, prefix=prefix+extension) - print(first + ((name_of(root_name) if callable(name_of) else root_name) or "root")) + _print_output(first + ((name_of(root_name) if callable(name_of) else root_name) or "root")) for line in tree_generator(root_name, prefix=" "): - print(line) + _print_output(line) def _read_json_from_file(file: str) -> Optional[dict]: if not os.path.exists(file): _print(f"Cannot find file: {file}") - sys.exit(1) + _exit(1) try: with io.open(file, "r") as f: try: return json.load(f) except Exception: _print(f"Cannot parse JSON in file: {file}") - sys.exit(1) + _exit(1) except Exception as e: - print(e) + _print(e) _print(f"Cannot open file: {file}") - sys.exit(1) + _exit(1) def _print(*args, **kwargs): @@ -617,10 +645,26 @@ def _print(*args, **kwargs): sys.stdout.flush() -def _exit(message: Optional[str] = None) -> None: - if message: +def _print_output(value: str): + global _output_file + if _output_file: + _output_file.write(value) + _output_file.write("\n") + else: + with uncaptured_output(): + PRINT(value) + sys.stdout.flush() + + +def _exit(message: Optional[Union[str, int]] = None, status: Optional[int] = None) -> None: + global _output_file + if isinstance(message, str): _print(f"ERROR: {message}") - sys.exit(1) + elif isinstance(message, int) and not isinstance(status, int): + status = message + if _output_file: + _output_file.close() + sys.exit(status if isinstance(status, int) else (0 if status is None else 1)) if __name__ == "__main__": diff --git a/pyproject.toml b/pyproject.toml index d13a9543d..cec5cbefa 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "dcicutils" -version = "8.13.2" # accidentally pushed 8.13.1 before ready - pushing this corrected 8.13.2 one before ready too but better than 8.13.1 # "8.13.0.1b3" # TODO: To become 8.13.2 +version = "8.13.1.1b3" # TODO: To become 8.13.3 description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources" authors = ["4DN-DCIC Team "] license = "MIT" @@ -96,6 +96,7 @@ publish-to-pypi = "dcicutils.scripts.publish_to_pypi:main" show-contributors = "dcicutils.contribution_scripts:show_contributors_main" run-license-checker = "dcicutils.scripts.run_license_checker:main" view-portal-object = "dcicutils.scripts.view_portal_object:main" +update-portal-object = "dcicutils.scripts.update_portal_object:main" [tool.pytest.ini_options] From 8710a408beac166d0ed721cc6e525d853edafd01 Mon Sep 17 00:00:00 2001 From: David Michaels Date: Thu, 11 Jul 2024 09:34:59 -0400 Subject: [PATCH 20/20] ready to merge --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index cec5cbefa..5f9b51c6f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "dcicutils" -version = "8.13.1.1b3" # TODO: To become 8.13.3 +version = "8.13.3" description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources" authors = ["4DN-DCIC Team "] license = "MIT"