Skip to content

Commit

Permalink
Merge pull request #300 from 4dn-dcic/dmichaels-20240218
Browse files Browse the repository at this point in the history
Changes to troubleshooting utility script view-portal-object
  • Loading branch information
dmichaels-harvard authored Mar 13, 2024
2 parents f6d8ae4 + b974655 commit 6359cca
Show file tree
Hide file tree
Showing 46 changed files with 39,934 additions and 249 deletions.
10 changes: 10 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,16 @@ Change Log
----------


8.8.1
=====
* Changes to troubleshooting utility script view-portal-object.
* Some reworking of ref lookup in structured_data.
* Support ref caching in structured_data.
* Added hook to turn off ref lookup by subtypes in case we need this later.
* Added hook do ref lookup at root path first; set to true by smaht-portal for accession IDs.
* Moved/adapted test_structured_data.py from smaht-portal to here.


8.8.0
=====
* Changes to structured_data support date/time types.
Expand Down
7 changes: 6 additions & 1 deletion dcicutils/data_readers.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,12 @@ def sheet_reader(self, sheet_name: str) -> ExcelSheetReader:

def open(self) -> None:
if self._workbook is None:
self._workbook = openpyxl.load_workbook(self._file, data_only=True)
import warnings
with warnings.catch_warnings():
# Without this warning suppression thing, for some spreadsheets we get this stdout warning:
# UserWarning: data validation extension is not supported and will be removed
warnings.filterwarnings("ignore", category=UserWarning)
self._workbook = openpyxl.load_workbook(self._file, data_only=True)
self.sheet_names = [sheet_name for sheet_name in self._workbook.sheetnames
if not self.is_hidden_sheet(self._workbook[sheet_name])]

Expand Down
4 changes: 4 additions & 0 deletions dcicutils/file_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ def search_for_file(file: str,
elif not isinstance(location, list):
location = []
for directory in location:
if not directory:
continue
if isinstance(directory, (str, pathlib.PosixPath)) and os.path.exists(os.path.join(directory, file)):
file_found = os.path.abspath(os.path.normpath(os.path.join(directory, file)))
if single:
Expand All @@ -37,6 +39,8 @@ def search_for_file(file: str,
files_found.append(file_found)
if recursive:
for directory in location:
if not directory:
continue
if not directory.endswith("/**") and not file.startswith("**/"):
path = f"{directory}/**/{file}"
else:
Expand Down
202 changes: 116 additions & 86 deletions dcicutils/portal_object_utils.py

Large diffs are not rendered by default.

58 changes: 52 additions & 6 deletions dcicutils/portal_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,22 @@ class Portal:
KEYS_FILE_DIRECTORY = "~"
MIME_TYPE_JSON = "application/json"

# Object lookup strategies; on a per-reference (type/value) basis, used currently ONLY by
# structured_data.py; controlled by an optional ref_lookup_strategy callable; default is
# lookup at root path but after the specified type path lookup, and then lookup all subtypes;
# can choose to lookup root path first, or not lookup root path at all, or not lookup
# subtypes at all; the ref_lookup_strategy callable if specified should take a type_name
# and value (string) arguements and return an integer of any of the below ORed together.
# The main purpose of this is optimization; to minimize portal lookups; since for example,
# currently at least, /{type}/{accession} does not work but /{accession} does; so we
# currently (smaht-portal/.../ingestion_processors) use LOOKUP_ROOT_FIRST for this.
# And current usage NEVER has LOOKUP_SUBTYPES turned OFF; but support just in case.
LOOKUP_SPECIFIED_TYPE = 0x0001
LOOKUP_ROOT = 0x0002
LOOKUP_ROOT_FIRST = 0x0004 | LOOKUP_ROOT
LOOKUP_SUBTYPES = 0x0008
LOOKUP_DEFAULT = LOOKUP_SPECIFIED_TYPE | LOOKUP_ROOT | LOOKUP_SUBTYPES

def __init__(self,
arg: Optional[Union[Portal, TestApp, VirtualApp, PyramidRouter, dict, tuple, str]] = None,
env: Optional[str] = None, server: Optional[str] = None,
Expand Down Expand Up @@ -188,6 +204,23 @@ def app(self) -> Optional[str]:
def vapp(self) -> Optional[TestApp]:
return self._vapp

@staticmethod
def is_lookup_specified_type(lookup_options: int) -> bool:
return (lookup_options &
Portal.LOOKUP_SPECIFIED_TYPE) == Portal.LOOKUP_SPECIFIED_TYPE

@staticmethod
def is_lookup_root(lookup_options: int) -> bool:
return (lookup_options & Portal.LOOKUP_ROOT) == Portal.LOOKUP_ROOT

@staticmethod
def is_lookup_root_first(lookup_options: int) -> bool:
return (lookup_options & Portal.LOOKUP_ROOT_FIRST) == Portal.LOOKUP_ROOT_FIRST

@staticmethod
def is_lookup_subtypes(lookup_options: int) -> bool:
return (lookup_options & Portal.LOOKUP_SUBTYPES) == Portal.LOOKUP_SUBTYPES

def get(self, url: str, follow: bool = True,
raw: bool = False, database: bool = False, raise_for_status: bool = False, **kwargs) -> OptionalResponse:
url = self.url(url, raw, database)
Expand Down Expand Up @@ -232,14 +265,21 @@ def post(self, url: str, data: Optional[dict] = None, json: Optional[dict] = Non
response.raise_for_status()
return response

def get_metadata(self, object_id: str, raw: bool = False, database: bool = False) -> Optional[dict]:
def get_metadata(self, object_id: str, raw: bool = False,
database: bool = False, raise_exception: bool = True) -> Optional[dict]:
if isinstance(raw, bool) and raw:
add_on = "frame=raw" + ("&datastore=database" if isinstance(database, bool) and database else "")
elif database:
add_on = "datastore=database"
else:
add_on = ""
return get_metadata(obj_id=object_id, vapp=self.vapp, key=self.key, add_on=add_on)
if raise_exception:
return get_metadata(obj_id=object_id, vapp=self.vapp, key=self.key, add_on=add_on)
else:
try:
return get_metadata(obj_id=object_id, vapp=self.vapp, key=self.key, add_on=add_on)
except Exception:
return None

def patch_metadata(self, object_id: str, data: dict) -> Optional[dict]:
if self.key:
Expand Down Expand Up @@ -331,15 +371,15 @@ def get_schemas_super_type_map(self) -> dict:
Returns the "super type map" for all of the known schemas (via /profiles).
This is a dictionary with property names which are all known schema type names which
have (one or more) sub-types, and the value of each such property name is an array
of all of those sub-types (direct and all descendents), in breadth first order.
of all of those sub-type names (direct and all descendents), in breadth first order.
"""
def list_breadth_first(super_type_map: dict, super_type_name: str) -> dict:
result = []
queue = deque(super_type_map.get(super_type_name, []))
while queue:
result.append(sub_type_name := queue.popleft())
if sub_type_name in super_type_map:
queue.extend(super_type_map[sub_type_name])
result.append(subtype_name := queue.popleft())
if subtype_name in super_type_map:
queue.extend(super_type_map[subtype_name])
return result
if not (schemas := self.get_schemas()):
return {}
Expand All @@ -358,6 +398,12 @@ def list_breadth_first(super_type_map: dict, super_type_name: str) -> dict:
super_type_map_flattened[super_type_name] = list_breadth_first(super_type_map, super_type_name)
return super_type_map_flattened

@lru_cache(maxsize=64)
def get_schema_subtype_names(self, type_name: str) -> List[str]:
if not (schemas_super_type_map := self.get_schemas_super_type_map()):
return []
return schemas_super_type_map.get(type_name, [])

def url(self, url: str, raw: bool = False, database: bool = False) -> str:
if not isinstance(url, str) or not url:
return "/"
Expand Down
Loading

0 comments on commit 6359cca

Please sign in to comment.