Skip to content

Commit

Permalink
Merge pull request #281 from 4dn-dcic/kmp_schemas_from_vapp
Browse files Browse the repository at this point in the history
Allow ff_utils.get_schema and ff_utils.get_schemas to take a portal_vapp
  • Loading branch information
netsettler authored Sep 7, 2023
2 parents 2a684c0 + 0963900 commit b079275
Show file tree
Hide file tree
Showing 6 changed files with 200 additions and 19 deletions.
17 changes: 17 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,23 @@ Change Log
----------


7.10.0
======

* In ``ff_utils``:

* New arguments ``portal_env=`` and ``portal_vapp`` to ``get_schema``
for function ``get_schema`` and ``get_schemas``.

* In ``s3_utils``:

* Fix a failing test (caused by an environmental change, no functional change).

* In ``license_utils``:

* Allow C4 infrastructure to use the ``chardet`` library.


7.9.0
=====

Expand Down
77 changes: 63 additions & 14 deletions dcicutils/ff_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

from collections import namedtuple
from elasticsearch.exceptions import AuthorizationException
from typing import Optional, Dict, List
from typing import Dict, List, Optional
from urllib.parse import parse_qs, urlencode, urlparse, urlunparse
from . import s3_utils, es_utils
from .common import (
Expand All @@ -17,7 +17,7 @@
# S3BucketName, S3KeyName,
)
from .lang_utils import disjoined_list
from .misc_utils import PRINT, to_camel_case, remove_suffix
from .misc_utils import PRINT, to_camel_case, remove_suffix, VirtualApp


# TODO (C4-92, C4-102): Probably to centralize this information in env_utils. Also figure out relation to CGAP.
Expand Down Expand Up @@ -419,7 +419,7 @@ def search_result_generator(page_generator):
but where a page size of 3 is used with start position 0. That call will return A,C,E. The
user may expect G,I on the second page, but before it can be done, suppose an element D is
indexed and that the stored data is A,C,D,E,G,I,K,M. Requesting data from start position 0 would
now return A,C,D but we already had the first page, so we request data starting at position 3
now return A,C,D, but we already had the first page, so we request data starting at position 3
for the second page and get E,G,I. That means our sequence of return values would be A,C,E,E,G,I,K,M,
or, in other words, showing a duplication. To avoid this, we keep track of the IDs we've seen
and show only the first case of each element, so A,C,E,G,I,K,M. (We won't see the D, but we weren't
Expand Down Expand Up @@ -647,7 +647,7 @@ def get_associated_qc_metrics(uuid, key=None, ff_env=None, include_processed_fil
include_raw_files=False,
include_supplementary_files=False):
"""
Given a uuid of an experimentSet return a dictionary of dictionaries with each dictionary
Given a UUID of an experimentSet return a dictionary of dictionaries with each dictionary
representing a quality metric.
Args:
Expand Down Expand Up @@ -942,41 +942,90 @@ def _get_es_metadata(uuids, es_client, filters, sources, chunk_size, auth):
yield hit['_source'] # yield individual items from ES


def get_schema(name, key=None, ff_env=None) -> Dict:
def resolve_portal_env(ff_env: Optional[str], portal_env: Optional[str],
portal_vapp: Optional[VirtualApp]) -> Optional[str]:
"""
Resolves which of ff_env and portal_env to use (after doing consistency checking).
There are two consistency checks performed, for which an error is raised on failure:
1. If neither ff_env= and portal_env= is None, the values must be compatible.
2. If either ff_env= or portal_env= is not None, portal_vapp= must be None.
The intent is that callers will do:
portal_env = resolve_portal_env(ff_env=ff_env, portal_env=portal_env, portal_vapp=portal_vapp)
and then afterward not have to worry that arguments are inconsistent.
Args:
ff_env: an environment name or None
portal_env: an environment name or None
portal_vapp: a VirtualApp or None
"""
if ff_env:
if portal_env and portal_env != ff_env:
raise ValueError("You may not supply both portal_env= and ff_env= together.")
portal_env = ff_env
if portal_env and portal_vapp:
env_arg_name = 'ff_env=' if ff_env else 'portal_env='
raise ValueError(f"You may not supply both portal_vapp= and {env_arg_name} together.")
return portal_env


def get_schema(name, key=None, ff_env: Optional[str] = None, portal_env: Optional[str] = None,
portal_vapp: Optional[VirtualApp] = None) -> Dict:
"""
Gets the schema definition with the given name.
Only one of portal_env= (or ff_env=) or portal_vapp= can be provided. This determines how the schemas are obtained.
Args:
name (str): a schema name (CamelCase or snake_case), or None
key (dict): standard ff_utils authentication key
ff_env (str): standard ff environment string
ff_env (str): standard environment string (deprecated, please prefer portal_env=)
portal_env: standard environment string (compatible replacement for ff_env=)
portal_vapp: a VirtualApp or None
Returns:
dict: contains key schema names and value item class names
"""
auth = get_authentication_with_server(key, ff_env)
url = f"profiles/{to_camel_case(name)}.json"
schema = get_metadata(url, key=auth, add_on='frame=raw')
return schema
portal_env = resolve_portal_env(ff_env=ff_env, portal_env=portal_env, portal_vapp=portal_vapp)
base_url = f"profiles/{to_camel_case(name)}.json"
add_on = 'frame=raw'
if portal_vapp:
full_url = f"{base_url}?{add_on}"
res = portal_vapp.get(full_url)
return get_response_json(res)
else:
schema = get_metadata(obj_id=base_url, key=key, ff_env=portal_env, add_on=add_on)
return schema


def get_schemas(key=None, ff_env=None, *, allow_abstract=True, require_id=False) -> Dict[str, Dict]:
def get_schemas(key=None, ff_env: Optional[str] = None, *, allow_abstract: bool = True, require_id: bool = False,
portal_env: Optional[str] = None, portal_vapp: Optional[VirtualApp] = None) -> Dict[str, Dict]:
"""
Gets a dictionary of all schema definitions.
By default, this returns all schemas, but the allow_abstract= and require_id= keywords allow limited filtering.
Only one of portal_env= (or ff_env=) or portal_vapp= can be provided. This determines how the schemas are obtained.
Args:
key (dict): standard ff_utils authentication key
ff_env (str): standard ff environment string
ff_env (str): standard environment string (deprecated, please prefer portal_env=)
portal_env: standard environment string (compatible replacement for ff_env=)
portal_vapp: a VirtualApp or None
allow_abstract (boolean): controls whether abstract schemas can be returned (default True, return them)
require_id (boolean): controls whether a '$id' field is required for schema to be included
(default False, include even if no $id)
Returns:
dict: a mapping from keys that are schema names to schema definitions
"""
auth = get_authentication_with_server(key, ff_env)
schemas: Dict[str, Dict] = get_metadata('profiles/', key=auth, add_on='frame=raw')
portal_env = resolve_portal_env(ff_env=ff_env, portal_env=portal_env, portal_vapp=portal_vapp)
base_url = 'profiles/'
add_on = 'frame=raw'
if portal_vapp:
full_url = f"{base_url}?{add_on}"
schemas: Dict[str, Dict] = portal_vapp.get(full_url)
else:
schemas: Dict[str, Dict] = get_metadata(obj_id=base_url, key=key, ff_env=portal_env, add_on=add_on)
filtered_schemas = {}
for schema_name, schema in schemas.items():
if allow_abstract or not schema.get('isAbstract'):
Expand Down
6 changes: 6 additions & 0 deletions dcicutils/license_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -810,6 +810,12 @@ class C4InfrastructureLicenseChecker(LicenseChecker):
'pytest-timeout', # MIT Licensed
],

# Linking = With Restrictions, Private Use = Yes
# Ref: https://en.wikipedia.org/wiki/Comparison_of_free_and_open-source_software_licenses
'GNU Lesser General Public License v2 or later (LGPLv2+)': [
'chardet' # used at runtime during server operation (ingestion), but not modified or distributed
],

# Linking = With Restrictions, Private Use = Yes
# Ref: https://en.wikipedia.org/wiki/Comparison_of_free_and_open-source_software_licenses
'GNU Lesser General Public License v3 or later (LGPLv3+)': [
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "dcicutils"
version = "7.9.0"
version = "7.10.0"
description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources"
authors = ["4DN-DCIC Team <support@4dnucleome.org>"]
license = "MIT"
Expand Down
112 changes: 109 additions & 3 deletions test/test_ff_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -1316,6 +1316,111 @@ def get_it():
time.sleep(2)


@pytest.mark.unit
def test_get_schema_with_vapp():

sample_vapp = mock.MagicMock()
sample_schema_metadata = {"foo": "foo-schema", "bar": "bar-schema"}
sample_auth = mock.MagicMock()

with pytest.raises(ValueError) as exc:
ff_utils.get_schema('User', ff_env='foo', portal_env='bar')
assert str(exc.value) == 'You may not supply both portal_env= and ff_env= together.'

with pytest.raises(ValueError) as exc:
ff_utils.get_schema('User', ff_env='foo', portal_vapp=sample_vapp)
assert str(exc.value) == 'You may not supply both portal_vapp= and ff_env= together.'

with pytest.raises(ValueError) as exc:
ff_utils.get_schema('User', portal_env='foo', portal_vapp=sample_vapp)
assert str(exc.value) == 'You may not supply both portal_vapp= and portal_env= together.'

for env_args in [{}, {'portal_env': 'foo'}, {'ff_env': 'foo'}]:

with mock.patch.object(ff_utils, "get_metadata") as mock_get_metadata:
with mock.patch.object(ff_utils, "get_authentication_with_server") as mock_get_authentication_with_server:

expected_env = list(env_args.items())[0][1] if env_args else None

mock_get_metadata.return_value = sample_schema_metadata
mock_get_authentication_with_server.return_value = sample_auth

# When called with no vapp, get_metadata is consulted (after getting auth info)
assert ff_utils.get_schema('User', **env_args) == sample_schema_metadata

mock_get_authentication_with_server.assert_not_called()
mock_get_metadata.assert_called_once_with(obj_id='profiles/User.json', key=None, ff_env=expected_env,
add_on='frame=raw')

sample_vapp.get.assert_not_called()

sample_vapp.get.assert_not_called()

with mock.patch.object(ff_utils, "get_metadata") as mock_get_metadata:
with mock.patch.object(ff_utils, "get_authentication_with_server") as mock_get_authentication_with_server:

sample_vapp.get.return_value = MockResponse(200, json=sample_schema_metadata)

assert ff_utils.get_schema('User', portal_vapp=sample_vapp) == sample_schema_metadata

mock_get_authentication_with_server.assert_not_called()
mock_get_metadata.assert_not_called()

sample_vapp.get.assert_called_once_with('profiles/User.json?frame=raw')


@pytest.mark.unit
def test_get_schemas_with_vapp():

sample_vapp = mock.MagicMock()
sample_schema_metadata = {"foo": {"$id": "Foo.json"}, "bar": {"$id": "Bar.json"}}
sample_auth = mock.MagicMock()

with pytest.raises(ValueError) as exc:
ff_utils.get_schemas(ff_env='foo', portal_env='bar')
assert str(exc.value) == 'You may not supply both portal_env= and ff_env= together.'

with pytest.raises(ValueError) as exc:
ff_utils.get_schemas(ff_env='foo', portal_vapp=sample_vapp)
assert str(exc.value) == 'You may not supply both portal_vapp= and ff_env= together.'

with pytest.raises(ValueError) as exc:
ff_utils.get_schemas(portal_env='foo', portal_vapp=sample_vapp)
assert str(exc.value) == 'You may not supply both portal_vapp= and portal_env= together.'

for env_args in [{}, {'portal_env': 'foo'}, {'ff_env': 'foo'}]:

with mock.patch.object(ff_utils, "get_metadata") as mock_get_metadata:
with mock.patch.object(ff_utils, "get_authentication_with_server") as mock_get_authentication_with_server:

expected_env = list(env_args.items())[0][1] if env_args else None

mock_get_metadata.return_value = sample_schema_metadata
mock_get_authentication_with_server.return_value = sample_auth

assert ff_utils.get_schemas(**env_args) == sample_schema_metadata

mock_get_authentication_with_server.assert_not_called()
mock_get_metadata.assert_called_once_with(obj_id='profiles/', key=None, ff_env=expected_env,
add_on='frame=raw')

sample_vapp.get.assert_not_called()

sample_vapp.get.assert_not_called()

with mock.patch.object(ff_utils, "get_metadata") as mock_get_metadata:
with mock.patch.object(ff_utils, "get_authentication_with_server") as mock_get_authentication_with_server:

sample_vapp.get.return_value = sample_schema_metadata

assert ff_utils.get_schemas(portal_vapp=sample_vapp) == sample_schema_metadata

mock_get_authentication_with_server.assert_not_called()
mock_get_metadata.assert_not_called()

sample_vapp.get.assert_called_once_with('profiles/?frame=raw')


def test_get_schemas_options():

mocked_schemas = {
Expand Down Expand Up @@ -1350,9 +1455,10 @@ def mocked_schemas_subset(keys):

with mock.patch.object(ff_utils, "get_metadata") as mock_get_metadata:

def mocked_get_metadata(url, key, add_on):
assert url == "profiles/" # this is the web API to ask for all profiles
assert key == 'some-auth' # we assume auth is tested elsewhere
def mocked_get_metadata(obj_id, key, ff_env, add_on):
assert obj_id == "profiles/" # this is the web API to ask for all profiles
assert key is None # it would get looked up
assert ff_env is None # it would get looked up, too
assert add_on == "frame=raw" # we presently always send this
return mocked_schemas

Expand Down
5 changes: 4 additions & 1 deletion test/test_s3_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -402,14 +402,17 @@ def test_s3utils_get_higlass_key(portal_env):
# assert key[dict_key]


_GOOGLE_FOURFRONT_PROJECT_ID = "fourfront-396315"


@pytest.mark.integrated
@using_fresh_ff_state_for_testing()
def test_s3utils_get_google_key():
s3u = s3Utils(env='staging')
keys = s3u.get_google_key()
assert isinstance(keys, dict)
assert keys['type'] == 'service_account'
assert keys["project_id"] == "fourdn-fourfront"
assert keys["project_id"] == _GOOGLE_FOURFRONT_PROJECT_ID
for dict_key in ['private_key_id', 'private_key', 'client_email', 'client_id', 'auth_uri', 'client_x509_cert_url']:
assert keys[dict_key]

Expand Down

0 comments on commit b079275

Please sign in to comment.