Skip to content

Commit

Permalink
Merge pull request #282 from 4dn-dcic/kmp_sheet_utils_with_vapp
Browse files Browse the repository at this point in the history
Add portal_vapp= functionality to sheet_utils
  • Loading branch information
netsettler authored Sep 7, 2023
2 parents e09af07 + 5a07b69 commit 295adfe
Show file tree
Hide file tree
Showing 8 changed files with 305 additions and 42 deletions.
19 changes: 18 additions & 1 deletion CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ Change Log
----------


7.10.0
7.11.0
======

* New module ``sheet_utils`` for loading workbooks.
Expand All @@ -34,6 +34,23 @@ Change Log
* New class ``JsonLinesReader``


7.10.0
======

* In ``ff_utils``:

* New arguments ``portal_env=`` and ``portal_vapp`` to ``get_schema``
for function ``get_schema`` and ``get_schemas``.

* In ``s3_utils``:

* Fix a failing test (caused by an environmental change, no functional change).

* In ``license_utils``:

* Allow C4 infrastructure to use the ``chardet`` library.


7.9.0
=====

Expand Down
77 changes: 63 additions & 14 deletions dcicutils/ff_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

from collections import namedtuple
from elasticsearch.exceptions import AuthorizationException
from typing import Optional, Dict, List
from typing import Dict, List, Optional
from urllib.parse import parse_qs, urlencode, urlparse, urlunparse
from . import s3_utils, es_utils
from .common import (
Expand All @@ -17,7 +17,7 @@
# S3BucketName, S3KeyName,
)
from .lang_utils import disjoined_list
from .misc_utils import PRINT, to_camel_case, remove_suffix
from .misc_utils import PRINT, to_camel_case, remove_suffix, VirtualApp


# TODO (C4-92, C4-102): Probably to centralize this information in env_utils. Also figure out relation to CGAP.
Expand Down Expand Up @@ -419,7 +419,7 @@ def search_result_generator(page_generator):
but where a page size of 3 is used with start position 0. That call will return A,C,E. The
user may expect G,I on the second page, but before it can be done, suppose an element D is
indexed and that the stored data is A,C,D,E,G,I,K,M. Requesting data from start position 0 would
now return A,C,D but we already had the first page, so we request data starting at position 3
now return A,C,D, but we already had the first page, so we request data starting at position 3
for the second page and get E,G,I. That means our sequence of return values would be A,C,E,E,G,I,K,M,
or, in other words, showing a duplication. To avoid this, we keep track of the IDs we've seen
and show only the first case of each element, so A,C,E,G,I,K,M. (We won't see the D, but we weren't
Expand Down Expand Up @@ -647,7 +647,7 @@ def get_associated_qc_metrics(uuid, key=None, ff_env=None, include_processed_fil
include_raw_files=False,
include_supplementary_files=False):
"""
Given a uuid of an experimentSet return a dictionary of dictionaries with each dictionary
Given a UUID of an experimentSet return a dictionary of dictionaries with each dictionary
representing a quality metric.
Args:
Expand Down Expand Up @@ -942,41 +942,90 @@ def _get_es_metadata(uuids, es_client, filters, sources, chunk_size, auth):
yield hit['_source'] # yield individual items from ES


def get_schema(name, key=None, ff_env=None) -> Dict:
def resolve_portal_env(ff_env: Optional[str], portal_env: Optional[str],
portal_vapp: Optional[VirtualApp]) -> Optional[str]:
"""
Resolves which of ff_env and portal_env to use (after doing consistency checking).
There are two consistency checks performed, for which an error is raised on failure:
1. If neither ff_env= and portal_env= is None, the values must be compatible.
2. If either ff_env= or portal_env= is not None, portal_vapp= must be None.
The intent is that callers will do:
portal_env = resolve_portal_env(ff_env=ff_env, portal_env=portal_env, portal_vapp=portal_vapp)
and then afterward not have to worry that arguments are inconsistent.
Args:
ff_env: an environment name or None
portal_env: an environment name or None
portal_vapp: a VirtualApp or None
"""
if ff_env:
if portal_env and portal_env != ff_env:
raise ValueError("You may not supply both portal_env= and ff_env= together.")
portal_env = ff_env
if portal_env and portal_vapp:
env_arg_name = 'ff_env=' if ff_env else 'portal_env='
raise ValueError(f"You may not supply both portal_vapp= and {env_arg_name} together.")
return portal_env


def get_schema(name, key=None, ff_env: Optional[str] = None, portal_env: Optional[str] = None,
portal_vapp: Optional[VirtualApp] = None) -> Dict:
"""
Gets the schema definition with the given name.
Only one of portal_env= (or ff_env=) or portal_vapp= can be provided. This determines how the schemas are obtained.
Args:
name (str): a schema name (CamelCase or snake_case), or None
key (dict): standard ff_utils authentication key
ff_env (str): standard ff environment string
ff_env (str): standard environment string (deprecated, please prefer portal_env=)
portal_env: standard environment string (compatible replacement for ff_env=)
portal_vapp: a VirtualApp or None
Returns:
dict: contains key schema names and value item class names
"""
auth = get_authentication_with_server(key, ff_env)
url = f"profiles/{to_camel_case(name)}.json"
schema = get_metadata(url, key=auth, add_on='frame=raw')
return schema
portal_env = resolve_portal_env(ff_env=ff_env, portal_env=portal_env, portal_vapp=portal_vapp)
base_url = f"profiles/{to_camel_case(name)}.json"
add_on = 'frame=raw'
if portal_vapp:
full_url = f"{base_url}?{add_on}"
res = portal_vapp.get(full_url)
return get_response_json(res)
else:
schema = get_metadata(obj_id=base_url, key=key, ff_env=portal_env, add_on=add_on)
return schema


def get_schemas(key=None, ff_env=None, *, allow_abstract=True, require_id=False) -> Dict[str, Dict]:
def get_schemas(key=None, ff_env: Optional[str] = None, *, allow_abstract: bool = True, require_id: bool = False,
portal_env: Optional[str] = None, portal_vapp: Optional[VirtualApp] = None) -> Dict[str, Dict]:
"""
Gets a dictionary of all schema definitions.
By default, this returns all schemas, but the allow_abstract= and require_id= keywords allow limited filtering.
Only one of portal_env= (or ff_env=) or portal_vapp= can be provided. This determines how the schemas are obtained.
Args:
key (dict): standard ff_utils authentication key
ff_env (str): standard ff environment string
ff_env (str): standard environment string (deprecated, please prefer portal_env=)
portal_env: standard environment string (compatible replacement for ff_env=)
portal_vapp: a VirtualApp or None
allow_abstract (boolean): controls whether abstract schemas can be returned (default True, return them)
require_id (boolean): controls whether a '$id' field is required for schema to be included
(default False, include even if no $id)
Returns:
dict: a mapping from keys that are schema names to schema definitions
"""
auth = get_authentication_with_server(key, ff_env)
schemas: Dict[str, Dict] = get_metadata('profiles/', key=auth, add_on='frame=raw')
portal_env = resolve_portal_env(ff_env=ff_env, portal_env=portal_env, portal_vapp=portal_vapp)
base_url = 'profiles/'
add_on = 'frame=raw'
if portal_vapp:
full_url = f"{base_url}?{add_on}"
schemas: Dict[str, Dict] = portal_vapp.get(full_url)
else:
schemas: Dict[str, Dict] = get_metadata(obj_id=base_url, key=key, ff_env=portal_env, add_on=add_on)
filtered_schemas = {}
for schema_name, schema in schemas.items():
if allow_abstract or not schema.get('isAbstract'):
Expand Down
6 changes: 5 additions & 1 deletion dcicutils/misc_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,11 @@ class _VirtualAppHelper(webtest.TestApp):
pass


class VirtualApp:
class AbstractVirtualApp:
pass


class VirtualApp(AbstractVirtualApp):
"""
Wrapper class for TestApp, to allow custom control over submitting Encoded requests,
simulating a number of conditions, including permissions.
Expand Down
63 changes: 45 additions & 18 deletions dcicutils/sheet_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,16 +11,16 @@
import uuid
import yaml

from dcicutils.common import AnyJsonData
from dcicutils.env_utils import public_env_name, EnvUtils
from dcicutils.ff_utils import get_schema
from dcicutils.lang_utils import conjoined_list, disjoined_list, maybe_pluralize, there_are
from dcicutils.misc_utils import ignored, PRINT, pad_to, JsonLinesReader
from dcicutils.task_utils import pmap
from openpyxl.worksheet.worksheet import Worksheet
from openpyxl.workbook.workbook import Workbook
from tempfile import TemporaryFile
from typing import Any, Dict, Iterable, List, Optional, Tuple, Type, Union
from .common import AnyJsonData
from .env_utils import public_env_name, EnvUtils
from .ff_utils import get_schema
from .lang_utils import conjoined_list, disjoined_list, maybe_pluralize, there_are
from .misc_utils import ignored, PRINT, pad_to, JsonLinesReader, AbstractVirtualApp
from .task_utils import pmap


Header = str
Expand Down Expand Up @@ -614,32 +614,34 @@ class SchemaAutoloadMixin(AbstractTableSetManager):
AUTOLOAD_SCHEMAS_DEFAULT = True

def __init__(self, filename: str, autoload_schemas: Optional[bool] = None, portal_env: Optional[str] = None,
**kwargs):
portal_vapp: Optional[AbstractVirtualApp] = None, **kwargs):
# This setup must be in place before the class initialization is done (via the super call).
self.autoload_schemas: bool = self.AUTOLOAD_SCHEMAS_DEFAULT if autoload_schemas is None else autoload_schemas
if self.autoload_schemas:
if portal_env is None:
if self.autoload_schemas: # If autoload_schemas is False, we don't care about doing this defaulting.
if portal_env is None and portal_vapp is None:
portal_env = public_env_name(EnvUtils.PRD_ENV_NAME)
PRINT(f"The portal_env was not explicitly supplied. Schemas will come from portal_env={portal_env!r}.")
self.portal_env: Optional[str] = portal_env
self.portal_vapp: Optional[AbstractVirtualApp] = portal_vapp
super().__init__(filename=filename, **kwargs)

def fetch_relevant_schemas(self, schema_names: List[str]):
# The schema_names argument is not normally given, but it is there for easier testing
def fetch_schema(schema_name):
schema = self.fetch_schema(schema_name, portal_env=self.portal_env)
schema = self.fetch_schema(schema_name, portal_env=self.portal_env, portal_vapp=self.portal_vapp)
return schema_name, schema
if self.autoload_schemas and self.portal_env:
if self.autoload_schemas and (self.portal_env or self.portal_vapp):
autoloaded = {tab_name: schema
for tab_name, schema in pmap(fetch_schema, schema_names)}
return autoloaded
else:
return {}

@classmethod
def fetch_schema(cls, schema_name: str, *, portal_env: str):
def fetch_schema(cls, schema_name: str, *, portal_env: Optional[str] = None,
portal_vapp: Optional[AbstractVirtualApp] = None):
def just_fetch_it():
return get_schema(schema_name, ff_env=portal_env)
return get_schema(schema_name, portal_env=portal_env, portal_vapp=portal_vapp)
if cls.CACHE_SCHEMAS:
schema: Optional[AnyJsonData] = cls.SCHEMA_CACHE.get(schema_name)
if schema is None:
Expand All @@ -665,9 +667,16 @@ def __init__(self, filename: str, schemas: Optional[Dict[str, AnyJsonData]] = No
self.patch_prototypes_by_tab_name: Dict[str, Dict] = {}
self.parsed_headers_by_tab_name: Dict[str, ParsedHeaders] = {}
self.type_hints_by_tab_name: Dict[str, OptionalTypeHints] = {}
self.schemas = schemas or self.fetch_relevant_schemas(self.tab_names)
self._schemas = schemas
self._instaguid_context_table: Dict[str, str] = {}

@property
def schemas(self):
schemas = self._schemas
if schemas is None:
self._schemas = schemas = self.fetch_relevant_schemas(self.tab_names)
return schemas

def sheet_patch_prototype(self, tab_name: str) -> Dict:
return self.patch_prototypes_by_tab_name[tab_name]

Expand Down Expand Up @@ -841,12 +850,18 @@ class SimpleYamlInsertsManager(SimpleInsertsMixin, YamlInsertsMixin, InsertsMana


class InsertsItemMixin(AbstractItemManager): # ItemManagerMixin isn't really appropriate here
"""
This class is used for inserts directories and other JSON-like data that will be literally used as an Item
without semantic pre-processing. In other words, these classes will not be pre-checked for semantic correctness
but instead assumed to have been checked by other means.
"""

AUTOLOAD_SCHEMAS_DEFAULT = False # Has no effect, but someone might inspect the value.

def __init__(self, filename: str, *, autoload_schemas: Optional[bool] = None, portal_env: Optional[str] = None,
schemas: Optional[Dict[str, AnyJsonData]] = None, **kwargs):
ignored(portal_env) # Would only be used if autoload_schemas was requested, and we don't allow that.
portal_vapp: Optional[AbstractVirtualApp] = None, schemas: Optional[Dict[str, AnyJsonData]] = None,
**kwargs):
ignored(portal_env, portal_vapp) # Would only be used if autoload_schemas was true, and we don't allow that.
if schemas not in [None, {}]:
raise ValueError(f"{self.__class__.__name__} does not allow schemas={schemas!r}.")
if autoload_schemas not in [None, False]:
Expand Down Expand Up @@ -1038,12 +1053,24 @@ def create_implementation_manager(cls, filename: str, **kwargs) -> AbstractItemM
@classmethod
def load(cls, filename: str, tab_name: Optional[str] = None, escaping: Optional[bool] = None,
schemas: Optional[Dict] = None, autoload_schemas: Optional[bool] = None,
portal_env: Optional[str] = None, portal_vapp: Optional[AbstractVirtualApp] = None,
**kwargs) -> TabbedSheetData:
"""
Given a filename and various options
Given a filename and various options, loads the items associated with that filename.
:param filename: The name of the file to load.
:param tab_name: For files that lack multiple tabs (such as .csv or .tsv),
the tab name to associate with the data.
:param escaping: Whether to perform escape processing on backslashes.
:param schemas: A set of schemas to use instead of trying to load them.
:param autoload_schemas: Whether to try autoloading schemas.
:param portal_env: A portal to consult to find schemas (usually if calling from the outside of a portal).
:param portal_vapp: A vapp to use (usually if calling from within a portal).
"""
manager = cls.create_implementation_manager(filename=filename, tab_name=tab_name, escaping=escaping,
schemas=schemas, autoload_schemas=autoload_schemas, **kwargs)
schemas=schemas, autoload_schemas=autoload_schemas,
portal_env=portal_env, portal_vapp=portal_vapp,
**kwargs)
return manager.load_content()


Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "dcicutils"
version = "7.9.0.1b5" # to become "7.10.0"
version = "7.10.0.1b7" # to become "7.11.0"
description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources"
authors = ["4DN-DCIC Team <support@4dnucleome.org>"]
license = "MIT"
Expand Down
Loading

0 comments on commit 295adfe

Please sign in to comment.