From c107e9db1b7af4bc8469bf4ce17c89d11fa24178 Mon Sep 17 00:00:00 2001 From: Kent Pitman Date: Wed, 6 Sep 2023 17:13:00 -0400 Subject: [PATCH 1/7] First cut at implementing new args for ff_utils.get_schema and ff_utils.get_schemas: portal_env= and portal_vapp=. --- CHANGELOG.rst | 9 +++ dcicutils/ff_utils.py | 79 +++++++++++++++++++++----- dcicutils/license_utils.py | 6 ++ pyproject.toml | 2 +- test/test_ff_utils.py | 110 ++++++++++++++++++++++++++++++++++++- test/test_s3_utils.py | 2 +- 6 files changed, 190 insertions(+), 18 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index f073f4a79..4922145f9 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -7,6 +7,15 @@ Change Log ---------- +7.10.0 +====== + +* In ``ff_utils``: + + * New arguments ``portal_env=`` and ``portal_vapp`` to ``get_schema`` + for function ``get_schema`` and ``get_schemas``. + + 7.9.0 ===== diff --git a/dcicutils/ff_utils.py b/dcicutils/ff_utils.py index 96f465290..b0e3baa84 100644 --- a/dcicutils/ff_utils.py +++ b/dcicutils/ff_utils.py @@ -8,7 +8,7 @@ from collections import namedtuple from elasticsearch.exceptions import AuthorizationException -from typing import Optional, Dict, List +from typing import Dict, List, Optional from urllib.parse import parse_qs, urlencode, urlparse, urlunparse from . import s3_utils, es_utils from .common import ( @@ -17,7 +17,7 @@ # S3BucketName, S3KeyName, ) from .lang_utils import disjoined_list -from .misc_utils import PRINT, to_camel_case, remove_suffix +from .misc_utils import PRINT, to_camel_case, remove_suffix, VirtualApp # TODO (C4-92, C4-102): Probably to centralize this information in env_utils. Also figure out relation to CGAP. @@ -419,7 +419,7 @@ def search_result_generator(page_generator): but where a page size of 3 is used with start position 0. That call will return A,C,E. The user may expect G,I on the second page, but before it can be done, suppose an element D is indexed and that the stored data is A,C,D,E,G,I,K,M. Requesting data from start position 0 would - now return A,C,D but we already had the first page, so we request data starting at position 3 + now return A,C,D, but we already had the first page, so we request data starting at position 3 for the second page and get E,G,I. That means our sequence of return values would be A,C,E,E,G,I,K,M, or, in other words, showing a duplication. To avoid this, we keep track of the IDs we've seen and show only the first case of each element, so A,C,E,G,I,K,M. (We won't see the D, but we weren't @@ -647,7 +647,7 @@ def get_associated_qc_metrics(uuid, key=None, ff_env=None, include_processed_fil include_raw_files=False, include_supplementary_files=False): """ - Given a uuid of an experimentSet return a dictionary of dictionaries with each dictionary + Given a UUID of an experimentSet return a dictionary of dictionaries with each dictionary representing a quality metric. Args: @@ -942,32 +942,76 @@ def _get_es_metadata(uuids, es_client, filters, sources, chunk_size, auth): yield hit['_source'] # yield individual items from ES -def get_schema(name, key=None, ff_env=None) -> Dict: +def resolve_portal_env(ff_env: Optional[str], portal_env: Optional[str], + portal_vapp: Optional[VirtualApp]) -> Optional[str]: + """ + Resolves which of ff_env and portal_env to use (after doing consistency checking). + There are two consistency checks performed, for which an error is raised on failure: + 1. If neither ff_env= and portal_env= is None, the values must be compatible. + 2. If either ff_env= or portal_env= is not None, portal_vapp= must be None. + + The intent is that callers will do: + portal_env = resolve_portal_env(ff_env=ff_env, portal_env=portal_env, portal_vapp=portal_vapp) + and then afterward not have to worry that arguments are inconsistent. + + Args: + ff_env: an environment name or None + portal_env: an environment name or None + portal_vapp: a VirtualApp or None + """ + if ff_env: + if portal_env and portal_env != ff_env: + raise ValueError("You may not supply both portal_env= and ff_env= together.") + portal_env = ff_env + if portal_env and portal_vapp: + env_arg_name = 'ff_env=' if ff_env else 'portal_env=' + raise ValueError(f"You may not supply both portal_vapp= and {env_arg_name} together.") + return portal_env + + +def get_schema(name, key=None, ff_env: Optional[str] = None, portal_env: Optional[str] = None, + portal_vapp: Optional[VirtualApp] = None) -> Dict: """ Gets the schema definition with the given name. + Only one of portal_env= (or ff_env=) or portal_vapp= can be provided. This determines how the schemas are obtained. + Args: name (str): a schema name (CamelCase or snake_case), or None key (dict): standard ff_utils authentication key - ff_env (str): standard ff environment string + ff_env (str): standard environment string (deprecated, please prefer portal_env=) + portal_env: standard environment string (compatible replacement for ff_env=) + portal_vapp: a VirtualApp or None Returns: dict: contains key schema names and value item class names """ - auth = get_authentication_with_server(key, ff_env) - url = f"profiles/{to_camel_case(name)}.json" - schema = get_metadata(url, key=auth, add_on='frame=raw') - return schema + portal_env = resolve_portal_env(ff_env=ff_env, portal_env=portal_env, portal_vapp=portal_vapp) + base_url = f"profiles/{to_camel_case(name)}.json" + add_on = 'frame=raw' + if portal_vapp: + full_url = f"{base_url}?{add_on}" + schema = portal_vapp.get(full_url) + return schema + else: + auth = get_authentication_with_server(auth=key, ff_env=portal_env) + schema = get_metadata(obj_id=base_url, key=auth, add_on=add_on) + return schema -def get_schemas(key=None, ff_env=None, *, allow_abstract=True, require_id=False) -> Dict[str, Dict]: +def get_schemas(key=None, ff_env: Optional[str] = None, *, allow_abstract: bool = True, require_id: bool = False, + portal_env: Optional[str] = None, portal_vapp: Optional[VirtualApp] = None) -> Dict[str, Dict]: """ Gets a dictionary of all schema definitions. By default, this returns all schemas, but the allow_abstract= and require_id= keywords allow limited filtering. + Only one of portal_env= (or ff_env=) or portal_vapp= can be provided. This determines how the schemas are obtained. + Args: key (dict): standard ff_utils authentication key - ff_env (str): standard ff environment string + ff_env (str): standard environment string (deprecated, please prefer portal_env=) + portal_env: standard environment string (compatible replacement for ff_env=) + portal_vapp: a VirtualApp or None allow_abstract (boolean): controls whether abstract schemas can be returned (default True, return them) require_id (boolean): controls whether a '$id' field is required for schema to be included (default False, include even if no $id) @@ -975,8 +1019,15 @@ def get_schemas(key=None, ff_env=None, *, allow_abstract=True, require_id=False) Returns: dict: a mapping from keys that are schema names to schema definitions """ - auth = get_authentication_with_server(key, ff_env) - schemas: Dict[str, Dict] = get_metadata('profiles/', key=auth, add_on='frame=raw') + portal_env = resolve_portal_env(ff_env=ff_env, portal_env=portal_env, portal_vapp=portal_vapp) + base_url = 'profiles/' + add_on = 'frame=raw' + if portal_vapp: + full_url = f"{base_url}?{add_on}" + schemas: Dict[str, Dict] = portal_vapp.get(full_url) + else: + auth = get_authentication_with_server(auth=key, ff_env=portal_env) + schemas: Dict[str, Dict] = get_metadata(obj_id=base_url, key=auth, add_on=add_on) filtered_schemas = {} for schema_name, schema in schemas.items(): if allow_abstract or not schema.get('isAbstract'): diff --git a/dcicutils/license_utils.py b/dcicutils/license_utils.py index 855fa5c80..db18fd7df 100644 --- a/dcicutils/license_utils.py +++ b/dcicutils/license_utils.py @@ -810,6 +810,12 @@ class C4InfrastructureLicenseChecker(LicenseChecker): 'pytest-timeout', # MIT Licensed ], + # Linking = With Restrictions, Private Use = Yes + # Ref: https://en.wikipedia.org/wiki/Comparison_of_free_and_open-source_software_licenses + 'GNU Lesser General Public License v2 or later (LGPLv2+)': [ + 'chardet' # used at runtime during server operation (ingestion), but not modified or distributed + ], + # Linking = With Restrictions, Private Use = Yes # Ref: https://en.wikipedia.org/wiki/Comparison_of_free_and_open-source_software_licenses 'GNU Lesser General Public License v3 or later (LGPLv3+)': [ diff --git a/pyproject.toml b/pyproject.toml index 70f90b624..1078a57ea 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "dcicutils" -version = "7.9.0" +version = "7.10.0" description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources" authors = ["4DN-DCIC Team "] license = "MIT" diff --git a/test/test_ff_utils.py b/test/test_ff_utils.py index 7d4254d7a..940820e57 100644 --- a/test/test_ff_utils.py +++ b/test/test_ff_utils.py @@ -1316,6 +1316,112 @@ def get_it(): time.sleep(2) +@pytest.mark.unit +def test_get_schema_with_vapp(): + + sample_vapp = mock.MagicMock() + sample_schema_metadata = {"foo": "foo-schema", "bar": "bar-schema"} + sample_auth = mock.MagicMock() + + with pytest.raises(ValueError) as exc: + ff_utils.get_schema('User', ff_env='foo', portal_env='bar') + assert str(exc.value) == 'You may not supply both portal_env= and ff_env= together.' + + with pytest.raises(ValueError) as exc: + ff_utils.get_schema('User', ff_env='foo', portal_vapp=sample_vapp) + assert str(exc.value) == 'You may not supply both portal_vapp= and ff_env= together.' + + with pytest.raises(ValueError) as exc: + ff_utils.get_schema('User', portal_env='foo', portal_vapp=sample_vapp) + assert str(exc.value) == 'You may not supply both portal_vapp= and portal_env= together.' + + for env_args in [{}, {'portal_env': 'foo'}, {'ff_env': 'foo'}]: + + with mock.patch.object(ff_utils, "get_metadata") as mock_get_metadata: + with mock.patch.object(ff_utils, "get_authentication_with_server") as mock_get_authentication_with_server: + + expected_env = list(env_args.items())[0][1] if env_args else None + + mock_get_metadata.return_value = sample_schema_metadata + mock_get_authentication_with_server.return_value = sample_auth + + # When called with no vapp, get_metadata is consulted (after getting auth info) + assert ff_utils.get_schema('User', **env_args) == sample_schema_metadata + + mock_get_authentication_with_server.assert_called_once_with(auth=None, ff_env=expected_env) + mock_get_metadata.assert_called_once_with(obj_id='profiles/User.json', key=sample_auth, + add_on='frame=raw') + + sample_vapp.get.assert_not_called() + + sample_vapp.get.assert_not_called() + + with mock.patch.object(ff_utils, "get_metadata") as mock_get_metadata: + with mock.patch.object(ff_utils, "get_authentication_with_server") as mock_get_authentication_with_server: + + sample_vapp.get.return_value = sample_schema_metadata + + assert ff_utils.get_schema('User', portal_vapp=sample_vapp) == sample_schema_metadata + + mock_get_authentication_with_server.assert_not_called() + mock_get_metadata.assert_not_called() + + sample_vapp.get.assert_called_once_with('profiles/User.json?frame=raw') + + +@pytest.mark.unit +def test_get_schemas_with_vapp(): + + sample_vapp = mock.MagicMock() + sample_schema_metadata = {"foo": {"$id": "Foo.json"}, "bar": {"$id": "Bar.json"}} + sample_auth = mock.MagicMock() + + with pytest.raises(ValueError) as exc: + ff_utils.get_schemas(ff_env='foo', portal_env='bar') + assert str(exc.value) == 'You may not supply both portal_env= and ff_env= together.' + + with pytest.raises(ValueError) as exc: + ff_utils.get_schemas(ff_env='foo', portal_vapp=sample_vapp) + assert str(exc.value) == 'You may not supply both portal_vapp= and ff_env= together.' + + with pytest.raises(ValueError) as exc: + ff_utils.get_schemas(portal_env='foo', portal_vapp=sample_vapp) + assert str(exc.value) == 'You may not supply both portal_vapp= and portal_env= together.' + + for env_args in [{}, {'portal_env': 'foo'}, {'ff_env': 'foo'}]: + + with mock.patch.object(ff_utils, "get_metadata") as mock_get_metadata: + with mock.patch.object(ff_utils, "get_authentication_with_server") as mock_get_authentication_with_server: + + expected_env = list(env_args.items())[0][1] if env_args else None + + mock_get_metadata.return_value = sample_schema_metadata + mock_get_authentication_with_server.return_value = sample_auth + + # When called with no vapp, get_metadata is consulted (after getting auth info) + assert ff_utils.get_schemas(**env_args) == sample_schema_metadata + + mock_get_authentication_with_server.assert_called_once_with(auth=None, ff_env=expected_env) + mock_get_metadata.assert_called_once_with(obj_id='profiles/', key=sample_auth, + add_on='frame=raw') + + sample_vapp.get.assert_not_called() + + sample_vapp.get.assert_not_called() + + with mock.patch.object(ff_utils, "get_metadata") as mock_get_metadata: + with mock.patch.object(ff_utils, "get_authentication_with_server") as mock_get_authentication_with_server: + + sample_vapp.get.return_value = sample_schema_metadata + + assert ff_utils.get_schemas(portal_vapp=sample_vapp) == sample_schema_metadata + + mock_get_authentication_with_server.assert_not_called() + mock_get_metadata.assert_not_called() + + sample_vapp.get.assert_called_once_with('profiles/?frame=raw') + + def test_get_schemas_options(): mocked_schemas = { @@ -1350,8 +1456,8 @@ def mocked_schemas_subset(keys): with mock.patch.object(ff_utils, "get_metadata") as mock_get_metadata: - def mocked_get_metadata(url, key, add_on): - assert url == "profiles/" # this is the web API to ask for all profiles + def mocked_get_metadata(obj_id, key, add_on): + assert obj_id == "profiles/" # this is the web API to ask for all profiles assert key == 'some-auth' # we assume auth is tested elsewhere assert add_on == "frame=raw" # we presently always send this return mocked_schemas diff --git a/test/test_s3_utils.py b/test/test_s3_utils.py index 3cfd05c7e..ba2c95878 100644 --- a/test/test_s3_utils.py +++ b/test/test_s3_utils.py @@ -409,7 +409,7 @@ def test_s3utils_get_google_key(): keys = s3u.get_google_key() assert isinstance(keys, dict) assert keys['type'] == 'service_account' - assert keys["project_id"] == "fourdn-fourfront" + assert keys["project_id"] == "fourfront-396315" for dict_key in ['private_key_id', 'private_key', 'client_email', 'client_id', 'auth_uri', 'client_x509_cert_url']: assert keys[dict_key] From acfb780705f9aea6afd3ed4138c39d0df1da59de Mon Sep 17 00:00:00 2001 From: Kent Pitman Date: Thu, 7 Sep 2023 05:30:33 -0400 Subject: [PATCH 2/7] Fix a bug in vapp-handling. --- dcicutils/ff_utils.py | 4 ++-- test/test_ff_utils.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/dcicutils/ff_utils.py b/dcicutils/ff_utils.py index b0e3baa84..949d715e7 100644 --- a/dcicutils/ff_utils.py +++ b/dcicutils/ff_utils.py @@ -991,8 +991,8 @@ def get_schema(name, key=None, ff_env: Optional[str] = None, portal_env: Optiona add_on = 'frame=raw' if portal_vapp: full_url = f"{base_url}?{add_on}" - schema = portal_vapp.get(full_url) - return schema + res = portal_vapp.get(full_url) + return get_response_json(res) else: auth = get_authentication_with_server(auth=key, ff_env=portal_env) schema = get_metadata(obj_id=base_url, key=auth, add_on=add_on) diff --git a/test/test_ff_utils.py b/test/test_ff_utils.py index 940820e57..633772bab 100644 --- a/test/test_ff_utils.py +++ b/test/test_ff_utils.py @@ -1359,7 +1359,7 @@ def test_get_schema_with_vapp(): with mock.patch.object(ff_utils, "get_metadata") as mock_get_metadata: with mock.patch.object(ff_utils, "get_authentication_with_server") as mock_get_authentication_with_server: - sample_vapp.get.return_value = sample_schema_metadata + sample_vapp.get.return_value = MockResponse(200, json=sample_schema_metadata) assert ff_utils.get_schema('User', portal_vapp=sample_vapp) == sample_schema_metadata From bed59d5f30d1e3f647726db2e8872c5ed9c44f70 Mon Sep 17 00:00:00 2001 From: Kent Pitman Date: Thu, 7 Sep 2023 12:33:51 -0400 Subject: [PATCH 3/7] Simplifications per Will's code review of a related branch. --- dcicutils/ff_utils.py | 79 ++++++------------------------ test/test_ff_utils.py | 110 +----------------------------------------- 2 files changed, 16 insertions(+), 173 deletions(-) diff --git a/dcicutils/ff_utils.py b/dcicutils/ff_utils.py index 949d715e7..96f465290 100644 --- a/dcicutils/ff_utils.py +++ b/dcicutils/ff_utils.py @@ -8,7 +8,7 @@ from collections import namedtuple from elasticsearch.exceptions import AuthorizationException -from typing import Dict, List, Optional +from typing import Optional, Dict, List from urllib.parse import parse_qs, urlencode, urlparse, urlunparse from . import s3_utils, es_utils from .common import ( @@ -17,7 +17,7 @@ # S3BucketName, S3KeyName, ) from .lang_utils import disjoined_list -from .misc_utils import PRINT, to_camel_case, remove_suffix, VirtualApp +from .misc_utils import PRINT, to_camel_case, remove_suffix # TODO (C4-92, C4-102): Probably to centralize this information in env_utils. Also figure out relation to CGAP. @@ -419,7 +419,7 @@ def search_result_generator(page_generator): but where a page size of 3 is used with start position 0. That call will return A,C,E. The user may expect G,I on the second page, but before it can be done, suppose an element D is indexed and that the stored data is A,C,D,E,G,I,K,M. Requesting data from start position 0 would - now return A,C,D, but we already had the first page, so we request data starting at position 3 + now return A,C,D but we already had the first page, so we request data starting at position 3 for the second page and get E,G,I. That means our sequence of return values would be A,C,E,E,G,I,K,M, or, in other words, showing a duplication. To avoid this, we keep track of the IDs we've seen and show only the first case of each element, so A,C,E,G,I,K,M. (We won't see the D, but we weren't @@ -647,7 +647,7 @@ def get_associated_qc_metrics(uuid, key=None, ff_env=None, include_processed_fil include_raw_files=False, include_supplementary_files=False): """ - Given a UUID of an experimentSet return a dictionary of dictionaries with each dictionary + Given a uuid of an experimentSet return a dictionary of dictionaries with each dictionary representing a quality metric. Args: @@ -942,76 +942,32 @@ def _get_es_metadata(uuids, es_client, filters, sources, chunk_size, auth): yield hit['_source'] # yield individual items from ES -def resolve_portal_env(ff_env: Optional[str], portal_env: Optional[str], - portal_vapp: Optional[VirtualApp]) -> Optional[str]: - """ - Resolves which of ff_env and portal_env to use (after doing consistency checking). - There are two consistency checks performed, for which an error is raised on failure: - 1. If neither ff_env= and portal_env= is None, the values must be compatible. - 2. If either ff_env= or portal_env= is not None, portal_vapp= must be None. - - The intent is that callers will do: - portal_env = resolve_portal_env(ff_env=ff_env, portal_env=portal_env, portal_vapp=portal_vapp) - and then afterward not have to worry that arguments are inconsistent. - - Args: - ff_env: an environment name or None - portal_env: an environment name or None - portal_vapp: a VirtualApp or None - """ - if ff_env: - if portal_env and portal_env != ff_env: - raise ValueError("You may not supply both portal_env= and ff_env= together.") - portal_env = ff_env - if portal_env and portal_vapp: - env_arg_name = 'ff_env=' if ff_env else 'portal_env=' - raise ValueError(f"You may not supply both portal_vapp= and {env_arg_name} together.") - return portal_env - - -def get_schema(name, key=None, ff_env: Optional[str] = None, portal_env: Optional[str] = None, - portal_vapp: Optional[VirtualApp] = None) -> Dict: +def get_schema(name, key=None, ff_env=None) -> Dict: """ Gets the schema definition with the given name. - Only one of portal_env= (or ff_env=) or portal_vapp= can be provided. This determines how the schemas are obtained. - Args: name (str): a schema name (CamelCase or snake_case), or None key (dict): standard ff_utils authentication key - ff_env (str): standard environment string (deprecated, please prefer portal_env=) - portal_env: standard environment string (compatible replacement for ff_env=) - portal_vapp: a VirtualApp or None + ff_env (str): standard ff environment string Returns: dict: contains key schema names and value item class names """ - portal_env = resolve_portal_env(ff_env=ff_env, portal_env=portal_env, portal_vapp=portal_vapp) - base_url = f"profiles/{to_camel_case(name)}.json" - add_on = 'frame=raw' - if portal_vapp: - full_url = f"{base_url}?{add_on}" - res = portal_vapp.get(full_url) - return get_response_json(res) - else: - auth = get_authentication_with_server(auth=key, ff_env=portal_env) - schema = get_metadata(obj_id=base_url, key=auth, add_on=add_on) - return schema + auth = get_authentication_with_server(key, ff_env) + url = f"profiles/{to_camel_case(name)}.json" + schema = get_metadata(url, key=auth, add_on='frame=raw') + return schema -def get_schemas(key=None, ff_env: Optional[str] = None, *, allow_abstract: bool = True, require_id: bool = False, - portal_env: Optional[str] = None, portal_vapp: Optional[VirtualApp] = None) -> Dict[str, Dict]: +def get_schemas(key=None, ff_env=None, *, allow_abstract=True, require_id=False) -> Dict[str, Dict]: """ Gets a dictionary of all schema definitions. By default, this returns all schemas, but the allow_abstract= and require_id= keywords allow limited filtering. - Only one of portal_env= (or ff_env=) or portal_vapp= can be provided. This determines how the schemas are obtained. - Args: key (dict): standard ff_utils authentication key - ff_env (str): standard environment string (deprecated, please prefer portal_env=) - portal_env: standard environment string (compatible replacement for ff_env=) - portal_vapp: a VirtualApp or None + ff_env (str): standard ff environment string allow_abstract (boolean): controls whether abstract schemas can be returned (default True, return them) require_id (boolean): controls whether a '$id' field is required for schema to be included (default False, include even if no $id) @@ -1019,15 +975,8 @@ def get_schemas(key=None, ff_env: Optional[str] = None, *, allow_abstract: bool Returns: dict: a mapping from keys that are schema names to schema definitions """ - portal_env = resolve_portal_env(ff_env=ff_env, portal_env=portal_env, portal_vapp=portal_vapp) - base_url = 'profiles/' - add_on = 'frame=raw' - if portal_vapp: - full_url = f"{base_url}?{add_on}" - schemas: Dict[str, Dict] = portal_vapp.get(full_url) - else: - auth = get_authentication_with_server(auth=key, ff_env=portal_env) - schemas: Dict[str, Dict] = get_metadata(obj_id=base_url, key=auth, add_on=add_on) + auth = get_authentication_with_server(key, ff_env) + schemas: Dict[str, Dict] = get_metadata('profiles/', key=auth, add_on='frame=raw') filtered_schemas = {} for schema_name, schema in schemas.items(): if allow_abstract or not schema.get('isAbstract'): diff --git a/test/test_ff_utils.py b/test/test_ff_utils.py index 633772bab..7d4254d7a 100644 --- a/test/test_ff_utils.py +++ b/test/test_ff_utils.py @@ -1316,112 +1316,6 @@ def get_it(): time.sleep(2) -@pytest.mark.unit -def test_get_schema_with_vapp(): - - sample_vapp = mock.MagicMock() - sample_schema_metadata = {"foo": "foo-schema", "bar": "bar-schema"} - sample_auth = mock.MagicMock() - - with pytest.raises(ValueError) as exc: - ff_utils.get_schema('User', ff_env='foo', portal_env='bar') - assert str(exc.value) == 'You may not supply both portal_env= and ff_env= together.' - - with pytest.raises(ValueError) as exc: - ff_utils.get_schema('User', ff_env='foo', portal_vapp=sample_vapp) - assert str(exc.value) == 'You may not supply both portal_vapp= and ff_env= together.' - - with pytest.raises(ValueError) as exc: - ff_utils.get_schema('User', portal_env='foo', portal_vapp=sample_vapp) - assert str(exc.value) == 'You may not supply both portal_vapp= and portal_env= together.' - - for env_args in [{}, {'portal_env': 'foo'}, {'ff_env': 'foo'}]: - - with mock.patch.object(ff_utils, "get_metadata") as mock_get_metadata: - with mock.patch.object(ff_utils, "get_authentication_with_server") as mock_get_authentication_with_server: - - expected_env = list(env_args.items())[0][1] if env_args else None - - mock_get_metadata.return_value = sample_schema_metadata - mock_get_authentication_with_server.return_value = sample_auth - - # When called with no vapp, get_metadata is consulted (after getting auth info) - assert ff_utils.get_schema('User', **env_args) == sample_schema_metadata - - mock_get_authentication_with_server.assert_called_once_with(auth=None, ff_env=expected_env) - mock_get_metadata.assert_called_once_with(obj_id='profiles/User.json', key=sample_auth, - add_on='frame=raw') - - sample_vapp.get.assert_not_called() - - sample_vapp.get.assert_not_called() - - with mock.patch.object(ff_utils, "get_metadata") as mock_get_metadata: - with mock.patch.object(ff_utils, "get_authentication_with_server") as mock_get_authentication_with_server: - - sample_vapp.get.return_value = MockResponse(200, json=sample_schema_metadata) - - assert ff_utils.get_schema('User', portal_vapp=sample_vapp) == sample_schema_metadata - - mock_get_authentication_with_server.assert_not_called() - mock_get_metadata.assert_not_called() - - sample_vapp.get.assert_called_once_with('profiles/User.json?frame=raw') - - -@pytest.mark.unit -def test_get_schemas_with_vapp(): - - sample_vapp = mock.MagicMock() - sample_schema_metadata = {"foo": {"$id": "Foo.json"}, "bar": {"$id": "Bar.json"}} - sample_auth = mock.MagicMock() - - with pytest.raises(ValueError) as exc: - ff_utils.get_schemas(ff_env='foo', portal_env='bar') - assert str(exc.value) == 'You may not supply both portal_env= and ff_env= together.' - - with pytest.raises(ValueError) as exc: - ff_utils.get_schemas(ff_env='foo', portal_vapp=sample_vapp) - assert str(exc.value) == 'You may not supply both portal_vapp= and ff_env= together.' - - with pytest.raises(ValueError) as exc: - ff_utils.get_schemas(portal_env='foo', portal_vapp=sample_vapp) - assert str(exc.value) == 'You may not supply both portal_vapp= and portal_env= together.' - - for env_args in [{}, {'portal_env': 'foo'}, {'ff_env': 'foo'}]: - - with mock.patch.object(ff_utils, "get_metadata") as mock_get_metadata: - with mock.patch.object(ff_utils, "get_authentication_with_server") as mock_get_authentication_with_server: - - expected_env = list(env_args.items())[0][1] if env_args else None - - mock_get_metadata.return_value = sample_schema_metadata - mock_get_authentication_with_server.return_value = sample_auth - - # When called with no vapp, get_metadata is consulted (after getting auth info) - assert ff_utils.get_schemas(**env_args) == sample_schema_metadata - - mock_get_authentication_with_server.assert_called_once_with(auth=None, ff_env=expected_env) - mock_get_metadata.assert_called_once_with(obj_id='profiles/', key=sample_auth, - add_on='frame=raw') - - sample_vapp.get.assert_not_called() - - sample_vapp.get.assert_not_called() - - with mock.patch.object(ff_utils, "get_metadata") as mock_get_metadata: - with mock.patch.object(ff_utils, "get_authentication_with_server") as mock_get_authentication_with_server: - - sample_vapp.get.return_value = sample_schema_metadata - - assert ff_utils.get_schemas(portal_vapp=sample_vapp) == sample_schema_metadata - - mock_get_authentication_with_server.assert_not_called() - mock_get_metadata.assert_not_called() - - sample_vapp.get.assert_called_once_with('profiles/?frame=raw') - - def test_get_schemas_options(): mocked_schemas = { @@ -1456,8 +1350,8 @@ def mocked_schemas_subset(keys): with mock.patch.object(ff_utils, "get_metadata") as mock_get_metadata: - def mocked_get_metadata(obj_id, key, add_on): - assert obj_id == "profiles/" # this is the web API to ask for all profiles + def mocked_get_metadata(url, key, add_on): + assert url == "profiles/" # this is the web API to ask for all profiles assert key == 'some-auth' # we assume auth is tested elsewhere assert add_on == "frame=raw" # we presently always send this return mocked_schemas From ff1a6cc30a6aa5d6201434d58c0789938466fb76 Mon Sep 17 00:00:00 2001 From: Kent Pitman Date: Thu, 7 Sep 2023 12:51:41 -0400 Subject: [PATCH 4/7] Fix that prior commit pulled in wrong sources. --- dcicutils/ff_utils.py | 77 +++++++++++++++++++++++------ test/test_ff_utils.py | 112 ++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 172 insertions(+), 17 deletions(-) diff --git a/dcicutils/ff_utils.py b/dcicutils/ff_utils.py index 96f465290..280bdc0df 100644 --- a/dcicutils/ff_utils.py +++ b/dcicutils/ff_utils.py @@ -8,7 +8,7 @@ from collections import namedtuple from elasticsearch.exceptions import AuthorizationException -from typing import Optional, Dict, List +from typing import Dict, List, Optional from urllib.parse import parse_qs, urlencode, urlparse, urlunparse from . import s3_utils, es_utils from .common import ( @@ -17,7 +17,7 @@ # S3BucketName, S3KeyName, ) from .lang_utils import disjoined_list -from .misc_utils import PRINT, to_camel_case, remove_suffix +from .misc_utils import PRINT, to_camel_case, remove_suffix, VirtualApp # TODO (C4-92, C4-102): Probably to centralize this information in env_utils. Also figure out relation to CGAP. @@ -419,7 +419,7 @@ def search_result_generator(page_generator): but where a page size of 3 is used with start position 0. That call will return A,C,E. The user may expect G,I on the second page, but before it can be done, suppose an element D is indexed and that the stored data is A,C,D,E,G,I,K,M. Requesting data from start position 0 would - now return A,C,D but we already had the first page, so we request data starting at position 3 + now return A,C,D, but we already had the first page, so we request data starting at position 3 for the second page and get E,G,I. That means our sequence of return values would be A,C,E,E,G,I,K,M, or, in other words, showing a duplication. To avoid this, we keep track of the IDs we've seen and show only the first case of each element, so A,C,E,G,I,K,M. (We won't see the D, but we weren't @@ -647,7 +647,7 @@ def get_associated_qc_metrics(uuid, key=None, ff_env=None, include_processed_fil include_raw_files=False, include_supplementary_files=False): """ - Given a uuid of an experimentSet return a dictionary of dictionaries with each dictionary + Given a UUID of an experimentSet return a dictionary of dictionaries with each dictionary representing a quality metric. Args: @@ -942,32 +942,75 @@ def _get_es_metadata(uuids, es_client, filters, sources, chunk_size, auth): yield hit['_source'] # yield individual items from ES -def get_schema(name, key=None, ff_env=None) -> Dict: +def resolve_portal_env(ff_env: Optional[str], portal_env: Optional[str], + portal_vapp: Optional[VirtualApp]) -> Optional[str]: + """ + Resolves which of ff_env and portal_env to use (after doing consistency checking). + There are two consistency checks performed, for which an error is raised on failure: + 1. If neither ff_env= and portal_env= is None, the values must be compatible. + 2. If either ff_env= or portal_env= is not None, portal_vapp= must be None. + + The intent is that callers will do: + portal_env = resolve_portal_env(ff_env=ff_env, portal_env=portal_env, portal_vapp=portal_vapp) + and then afterward not have to worry that arguments are inconsistent. + + Args: + ff_env: an environment name or None + portal_env: an environment name or None + portal_vapp: a VirtualApp or None + """ + if ff_env: + if portal_env and portal_env != ff_env: + raise ValueError("You may not supply both portal_env= and ff_env= together.") + portal_env = ff_env + if portal_env and portal_vapp: + env_arg_name = 'ff_env=' if ff_env else 'portal_env=' + raise ValueError(f"You may not supply both portal_vapp= and {env_arg_name} together.") + return portal_env + + +def get_schema(name, key=None, ff_env: Optional[str] = None, portal_env: Optional[str] = None, + portal_vapp: Optional[VirtualApp] = None) -> Dict: """ Gets the schema definition with the given name. + Only one of portal_env= (or ff_env=) or portal_vapp= can be provided. This determines how the schemas are obtained. + Args: name (str): a schema name (CamelCase or snake_case), or None key (dict): standard ff_utils authentication key - ff_env (str): standard ff environment string + ff_env (str): standard environment string (deprecated, please prefer portal_env=) + portal_env: standard environment string (compatible replacement for ff_env=) + portal_vapp: a VirtualApp or None Returns: dict: contains key schema names and value item class names """ - auth = get_authentication_with_server(key, ff_env) - url = f"profiles/{to_camel_case(name)}.json" - schema = get_metadata(url, key=auth, add_on='frame=raw') - return schema + portal_env = resolve_portal_env(ff_env=ff_env, portal_env=portal_env, portal_vapp=portal_vapp) + base_url = f"profiles/{to_camel_case(name)}.json" + add_on = 'frame=raw' + if portal_vapp: + full_url = f"{base_url}?{add_on}" + res = portal_vapp.get(full_url) + return get_response_json(res) + else: + schema = get_metadata(obj_id=base_url, key=key, ff_env=portal_env, add_on=add_on) + return schema -def get_schemas(key=None, ff_env=None, *, allow_abstract=True, require_id=False) -> Dict[str, Dict]: +def get_schemas(key=None, ff_env: Optional[str] = None, *, allow_abstract: bool = True, require_id: bool = False, + portal_env: Optional[str] = None, portal_vapp: Optional[VirtualApp] = None) -> Dict[str, Dict]: """ Gets a dictionary of all schema definitions. By default, this returns all schemas, but the allow_abstract= and require_id= keywords allow limited filtering. + Only one of portal_env= (or ff_env=) or portal_vapp= can be provided. This determines how the schemas are obtained. + Args: key (dict): standard ff_utils authentication key - ff_env (str): standard ff environment string + ff_env (str): standard environment string (deprecated, please prefer portal_env=) + portal_env: standard environment string (compatible replacement for ff_env=) + portal_vapp: a VirtualApp or None allow_abstract (boolean): controls whether abstract schemas can be returned (default True, return them) require_id (boolean): controls whether a '$id' field is required for schema to be included (default False, include even if no $id) @@ -975,8 +1018,14 @@ def get_schemas(key=None, ff_env=None, *, allow_abstract=True, require_id=False) Returns: dict: a mapping from keys that are schema names to schema definitions """ - auth = get_authentication_with_server(key, ff_env) - schemas: Dict[str, Dict] = get_metadata('profiles/', key=auth, add_on='frame=raw') + portal_env = resolve_portal_env(ff_env=ff_env, portal_env=portal_env, portal_vapp=portal_vapp) + base_url = 'profiles/' + add_on = 'frame=raw' + if portal_vapp: + full_url = f"{base_url}?{add_on}" + schemas: Dict[str, Dict] = portal_vapp.get(full_url) + else: + schemas: Dict[str, Dict] = get_metadata(obj_id=base_url, key=key, ff_env=portal_env, add_on=add_on) filtered_schemas = {} for schema_name, schema in schemas.items(): if allow_abstract or not schema.get('isAbstract'): diff --git a/test/test_ff_utils.py b/test/test_ff_utils.py index 7d4254d7a..16413a519 100644 --- a/test/test_ff_utils.py +++ b/test/test_ff_utils.py @@ -1316,6 +1316,111 @@ def get_it(): time.sleep(2) +@pytest.mark.unit +def test_get_schema_with_vapp(): + + sample_vapp = mock.MagicMock() + sample_schema_metadata = {"foo": "foo-schema", "bar": "bar-schema"} + sample_auth = mock.MagicMock() + + with pytest.raises(ValueError) as exc: + ff_utils.get_schema('User', ff_env='foo', portal_env='bar') + assert str(exc.value) == 'You may not supply both portal_env= and ff_env= together.' + + with pytest.raises(ValueError) as exc: + ff_utils.get_schema('User', ff_env='foo', portal_vapp=sample_vapp) + assert str(exc.value) == 'You may not supply both portal_vapp= and ff_env= together.' + + with pytest.raises(ValueError) as exc: + ff_utils.get_schema('User', portal_env='foo', portal_vapp=sample_vapp) + assert str(exc.value) == 'You may not supply both portal_vapp= and portal_env= together.' + + for env_args in [{}, {'portal_env': 'foo'}, {'ff_env': 'foo'}]: + + with mock.patch.object(ff_utils, "get_metadata") as mock_get_metadata: + with mock.patch.object(ff_utils, "get_authentication_with_server") as mock_get_authentication_with_server: + + expected_env = list(env_args.items())[0][1] if env_args else None + + mock_get_metadata.return_value = sample_schema_metadata + mock_get_authentication_with_server.return_value = sample_auth + + # When called with no vapp, get_metadata is consulted (after getting auth info) + assert ff_utils.get_schema('User', **env_args) == sample_schema_metadata + + mock_get_authentication_with_server.assert_not_called() + mock_get_metadata.assert_called_once_with(obj_id='profiles/User.json', key=None, ff_env=expected_env, + add_on='frame=raw') + + sample_vapp.get.assert_not_called() + + sample_vapp.get.assert_not_called() + + with mock.patch.object(ff_utils, "get_metadata") as mock_get_metadata: + with mock.patch.object(ff_utils, "get_authentication_with_server") as mock_get_authentication_with_server: + + sample_vapp.get.return_value = MockResponse(200, json=sample_schema_metadata) + + assert ff_utils.get_schema('User', portal_vapp=sample_vapp) == sample_schema_metadata + + mock_get_authentication_with_server.assert_not_called() + mock_get_metadata.assert_not_called() + + sample_vapp.get.assert_called_once_with('profiles/User.json?frame=raw') + + +@pytest.mark.unit +def test_get_schemas_with_vapp(): + + sample_vapp = mock.MagicMock() + sample_schema_metadata = {"foo": {"$id": "Foo.json"}, "bar": {"$id": "Bar.json"}} + sample_auth = mock.MagicMock() + + with pytest.raises(ValueError) as exc: + ff_utils.get_schemas(ff_env='foo', portal_env='bar') + assert str(exc.value) == 'You may not supply both portal_env= and ff_env= together.' + + with pytest.raises(ValueError) as exc: + ff_utils.get_schemas(ff_env='foo', portal_vapp=sample_vapp) + assert str(exc.value) == 'You may not supply both portal_vapp= and ff_env= together.' + + with pytest.raises(ValueError) as exc: + ff_utils.get_schemas(portal_env='foo', portal_vapp=sample_vapp) + assert str(exc.value) == 'You may not supply both portal_vapp= and portal_env= together.' + + for env_args in [{}, {'portal_env': 'foo'}, {'ff_env': 'foo'}]: + + with mock.patch.object(ff_utils, "get_metadata") as mock_get_metadata: + with mock.patch.object(ff_utils, "get_authentication_with_server") as mock_get_authentication_with_server: + + expected_env = list(env_args.items())[0][1] if env_args else None + + mock_get_metadata.return_value = sample_schema_metadata + mock_get_authentication_with_server.return_value = sample_auth + + assert ff_utils.get_schemas(**env_args) == sample_schema_metadata + + mock_get_authentication_with_server.assert_not_called() + mock_get_metadata.assert_called_once_with(obj_id='profiles/', key=None, ff_env=expected_env, + add_on='frame=raw') + + sample_vapp.get.assert_not_called() + + sample_vapp.get.assert_not_called() + + with mock.patch.object(ff_utils, "get_metadata") as mock_get_metadata: + with mock.patch.object(ff_utils, "get_authentication_with_server") as mock_get_authentication_with_server: + + sample_vapp.get.return_value = sample_schema_metadata + + assert ff_utils.get_schemas(portal_vapp=sample_vapp) == sample_schema_metadata + + mock_get_authentication_with_server.assert_not_called() + mock_get_metadata.assert_not_called() + + sample_vapp.get.assert_called_once_with('profiles/?frame=raw') + + def test_get_schemas_options(): mocked_schemas = { @@ -1350,9 +1455,10 @@ def mocked_schemas_subset(keys): with mock.patch.object(ff_utils, "get_metadata") as mock_get_metadata: - def mocked_get_metadata(url, key, add_on): - assert url == "profiles/" # this is the web API to ask for all profiles - assert key == 'some-auth' # we assume auth is tested elsewhere + def mocked_get_metadata(obj_id, key, ff_env, add_on): + assert obj_id == "profiles/" # this is the web API to ask for all profiles + assert key is None # it would get looked up + assert ff_env is None # it would get looked up, too assert add_on == "frame=raw" # we presently always send this return mocked_schemas From 76015b12b4393cca0f02b59c107289d860a9962e Mon Sep 17 00:00:00 2001 From: Kent Pitman Date: Thu, 7 Sep 2023 14:37:46 -0400 Subject: [PATCH 5/7] Minor cosmetics in response to Will's code review. --- test/test_s3_utils.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/test/test_s3_utils.py b/test/test_s3_utils.py index ba2c95878..0f6f78b42 100644 --- a/test/test_s3_utils.py +++ b/test/test_s3_utils.py @@ -402,6 +402,8 @@ def test_s3utils_get_higlass_key(portal_env): # assert key[dict_key] +_GOOGLE_FOURFRONT_PROJECT_ID = "fourfront-396315" + @pytest.mark.integrated @using_fresh_ff_state_for_testing() def test_s3utils_get_google_key(): @@ -409,7 +411,7 @@ def test_s3utils_get_google_key(): keys = s3u.get_google_key() assert isinstance(keys, dict) assert keys['type'] == 'service_account' - assert keys["project_id"] == "fourfront-396315" + assert keys["project_id"] == _GOOGLE_FOURFRONT_PROJECT_ID for dict_key in ['private_key_id', 'private_key', 'client_email', 'client_id', 'auth_uri', 'client_x509_cert_url']: assert keys[dict_key] From e9e044a321f6fe79b9fd9c0de98c758881ad7e71 Mon Sep 17 00:00:00 2001 From: Kent Pitman Date: Thu, 7 Sep 2023 14:41:03 -0400 Subject: [PATCH 6/7] PEP8 --- test/test_s3_utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/test/test_s3_utils.py b/test/test_s3_utils.py index 0f6f78b42..58eb680cc 100644 --- a/test/test_s3_utils.py +++ b/test/test_s3_utils.py @@ -404,6 +404,7 @@ def test_s3utils_get_higlass_key(portal_env): _GOOGLE_FOURFRONT_PROJECT_ID = "fourfront-396315" + @pytest.mark.integrated @using_fresh_ff_state_for_testing() def test_s3utils_get_google_key(): From 096390095233a4f9f9fa0ebbfe9fca7358588912 Mon Sep 17 00:00:00 2001 From: Kent Pitman Date: Thu, 7 Sep 2023 15:01:22 -0400 Subject: [PATCH 7/7] Augment changelog. --- CHANGELOG.rst | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 4922145f9..a92be15a1 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -15,6 +15,14 @@ Change Log * New arguments ``portal_env=`` and ``portal_vapp`` to ``get_schema`` for function ``get_schema`` and ``get_schemas``. +* In ``s3_utils``: + + * Fix a failing test (caused by an environmental change, no functional change). + +* In ``license_utils``: + + * Allow C4 infrastructure to use the ``chardet`` library. + 7.9.0 =====