From a2991ece09ed401f07aa86bf95e68529a9308239 Mon Sep 17 00:00:00 2001 From: David Michaels Date: Sat, 25 May 2024 23:40:10 -0400 Subject: [PATCH 01/37] adding merge capability to structured_data; first some portal_util changes. --- CHANGELOG.rst | 6 +++++ dcicutils/portal_utils.py | 50 ++++++++++++++++++++++++++++++++++++ dcicutils/structured_data.py | 12 ++++++--- pyproject.toml | 2 +- 4 files changed, 66 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 013c64297..c4d06872f 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -7,6 +7,12 @@ Change Log ---------- +8.8.6 +===== + +* Added merge capabilities to structured_data. (IN PROGRESS: 2025-05-25) + + 8.8.6 ===== diff --git a/dcicutils/portal_utils.py b/dcicutils/portal_utils.py index b6bc16684..34097a9fb 100644 --- a/dcicutils/portal_utils.py +++ b/dcicutils/portal_utils.py @@ -1,5 +1,6 @@ from collections import deque from functools import lru_cache +from dcicutils.function_cache_decorator import function_cache import io import json from pyramid.config import Configurator as PyramidConfigurator @@ -18,6 +19,7 @@ from dcicutils.common import APP_SMAHT, OrchestratedApp, ORCHESTRATED_APPS from dcicutils.ff_utils import get_metadata, get_schema, patch_metadata, post_metadata from dcicutils.misc_utils import to_camel_case, VirtualApp +from dcicutils.schema_utils import get_identifying_properties from dcicutils.tmpfile_utils import temporary_file Portal = Type["Portal"] # Forward type reference for type hints. @@ -441,6 +443,54 @@ def _kwargs(self, **kwargs) -> dict: result_kwargs["timeout"] = timeout return result_kwargs + @function_cache(maxsize=100, serialize_key=True) + def get_identifying_paths(self, portal_object: dict, portal_type: Optional[str] = None) -> List[str]: + """ + Returns the list of the identifying Portal (URL) paths for the given Portal object. Favors any + uuid based path and defavors aliases based paths (ala self.get_identifying_property_names); + no other ordering defined. Returns empty list of none or otherwise not found. + """ + results = [] + if not isinstance(portal_object, dict): + return results + if not isinstance(portal_type, str) or not portal_type: + if not (portal_type := self.get_schema_type(portal_object)): + return results + for identifying_property in self.get_identifying_property_names(portal_type): + if identifying_value := portal_object.get(identifying_property): + if isinstance(identifying_value, list): + for identifying_value_item in identifying_value: + results.append(f"/{portal_type}/{identifying_value_item}") + elif identifying_property == "uuid": + results.append(f"/{identifying_value}") + else: + results.append(f"/{portal_type}/{identifying_value}") + return results + + @function_cache(maxsize=100, serialize_key=True) + def get_identifying_property_names(self, schema: Union[str, dict]) -> List[str]: + """ + Returns the list of identifying property names for the given Portal schema, which may + be either a schema name or a schema object; empty list of none or otherwise not found. + """ + results = [] + if isinstance(schema, str): + try: + if not (schema := self.get_schema(schema)): + return results + except Exception: + return results + elif not isinstance(schema, dict): + return results + if not (identifying_properties := get_identifying_properties(schema)): + return results + identifying_properties = [*identifying_properties] + for favored_identifying_property in reversed(["uuid", "identifier"]): + if favored_identifying_property in identifying_properties: + identifying_properties.remove(favored_identifying_property) + identifying_properties.insert(0, favored_identifying_property) + return identifying_properties + @staticmethod def _default_keys_file(app: Optional[str], env: Optional[str], server: Optional[str]) -> Optional[str]: def infer_app_from_env(env: str) -> Optional[str]: # noqa diff --git a/dcicutils/structured_data.py b/dcicutils/structured_data.py index 6d489ea17..555cf738b 100644 --- a/dcicutils/structured_data.py +++ b/dcicutils/structured_data.py @@ -56,7 +56,7 @@ def __init__(self, file: Optional[str] = None, portal: Optional[Union[VirtualApp remove_empty_objects_from_lists: bool = True, ref_lookup_strategy: Optional[Callable] = None, ref_lookup_nocache: bool = False, - norefs: bool = False, + norefs: bool = False, merge: bool = False, progress: Optional[Callable] = None, debug_sleep: Optional[str] = None) -> None: self._progress = progress if callable(progress) else None @@ -75,6 +75,7 @@ def __init__(self, file: Optional[str] = None, portal: Optional[Union[VirtualApp self._nrows = 0 self._autoadd_properties = autoadd if isinstance(autoadd, dict) and autoadd else None self._norefs = True if norefs is True else False + self._merge = True if merge is True else False self._debug_sleep = None if debug_sleep: try: @@ -98,13 +99,13 @@ def load(file: str, portal: Optional[Union[VirtualApp, TestApp, Portal]] = None, remove_empty_objects_from_lists: bool = True, ref_lookup_strategy: Optional[Callable] = None, ref_lookup_nocache: bool = False, - norefs: bool = False, + norefs: bool = False, merge: bool = False, progress: Optional[Callable] = None, debug_sleep: Optional[str] = None) -> StructuredDataSet: return StructuredDataSet(file=file, portal=portal, schemas=schemas, autoadd=autoadd, order=order, prune=prune, remove_empty_objects_from_lists=remove_empty_objects_from_lists, ref_lookup_strategy=ref_lookup_strategy, ref_lookup_nocache=ref_lookup_nocache, - norefs=norefs, progress=progress, debug_sleep=debug_sleep) + norefs=norefs, merge=merge, progress=progress, debug_sleep=debug_sleep) def validate(self, force: bool = False) -> None: def data_without_deleted_properties(data: dict) -> dict: @@ -383,6 +384,11 @@ def _load_reader(self, reader: RowReader, type_name: str) -> None: structured_row_template.set_value(structured_row, column_name, value, reader.file, reader.row_number) if self._autoadd_properties: self._add_properties(structured_row, self._autoadd_properties, schema) + # New merge functionality (2024-05-25). + if self._merge: + for identifying_path in self.get_identifying_paths(self._portal, structured_row, type_name): + if existing_portal_object := self._portal.get_metadata(identifying_path): + structured_row = merge_objects(existing_portal_object, structured_row) if (prune_error := self._prune_structured_row(structured_row)) is not None: self._note_error({"src": create_dict(type=schema_name, row=reader.row_number), "error": prune_error}, "validation") diff --git a/pyproject.toml b/pyproject.toml index 424704e8f..01a761bad 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "dcicutils" -version = "8.8.6" +version = "8.8.6.1b1" description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources" authors = ["4DN-DCIC Team "] license = "MIT" From 3a6614d551c9bbd0351843e71bd1b56d6f44fcb9 Mon Sep 17 00:00:00 2001 From: David Michaels Date: Sun, 26 May 2024 08:38:35 -0400 Subject: [PATCH 02/37] adding merge capability to structured_data; first some portal_util changes. --- dcicutils/portal_utils.py | 82 ++++++++++++++++++------------------ dcicutils/structured_data.py | 37 +++++++++++----- pyproject.toml | 2 +- 3 files changed, 68 insertions(+), 53 deletions(-) diff --git a/dcicutils/portal_utils.py b/dcicutils/portal_utils.py index 34097a9fb..b0e9d7762 100644 --- a/dcicutils/portal_utils.py +++ b/dcicutils/portal_utils.py @@ -418,31 +418,6 @@ def get_schema_subtype_names(self, type_name: str) -> List[str]: return [] return schemas_super_type_map.get(type_name, []) - def url(self, url: str, raw: bool = False, database: bool = False) -> str: - if not isinstance(url, str) or not url: - return "/" - elif (lowercase_url := url.lower()).startswith("http://") or lowercase_url.startswith("https://"): - return url - elif not (url := re.sub(r"/+", "/", url)).startswith("/"): - url = "/" - url = self.server + url if self.server else url - if isinstance(raw, bool) and raw: - url += ("&" if "?" in url else "?") + "frame=raw" - if isinstance(database, bool) and database: - url += ("&" if "?" in url else "?") + "datastore=database" - return url - - def _kwargs(self, **kwargs) -> dict: - if "headers" in kwargs: - result_kwargs = {"headers": kwargs["headers"]} - else: - result_kwargs = {"headers": {"Content-type": Portal.MIME_TYPE_JSON, "Accept": Portal.MIME_TYPE_JSON}} - if self.key_pair: - result_kwargs["auth"] = self.key_pair - if isinstance(timeout := kwargs.get("timeout"), int): - result_kwargs["timeout"] = timeout - return result_kwargs - @function_cache(maxsize=100, serialize_key=True) def get_identifying_paths(self, portal_object: dict, portal_type: Optional[str] = None) -> List[str]: """ @@ -491,6 +466,31 @@ def get_identifying_property_names(self, schema: Union[str, dict]) -> List[str]: identifying_properties.insert(0, favored_identifying_property) return identifying_properties + def url(self, url: str, raw: bool = False, database: bool = False) -> str: + if not isinstance(url, str) or not url: + return "/" + elif (lowercase_url := url.lower()).startswith("http://") or lowercase_url.startswith("https://"): + return url + elif not (url := re.sub(r"/+", "/", url)).startswith("/"): + url = "/" + url = self.server + url if self.server else url + if isinstance(raw, bool) and raw: + url += ("&" if "?" in url else "?") + "frame=raw" + if isinstance(database, bool) and database: + url += ("&" if "?" in url else "?") + "datastore=database" + return url + + def _kwargs(self, **kwargs) -> dict: + if "headers" in kwargs: + result_kwargs = {"headers": kwargs["headers"]} + else: + result_kwargs = {"headers": {"Content-type": Portal.MIME_TYPE_JSON, "Accept": Portal.MIME_TYPE_JSON}} + if self.key_pair: + result_kwargs["auth"] = self.key_pair + if isinstance(timeout := kwargs.get("timeout"), int): + result_kwargs["timeout"] = timeout + return result_kwargs + @staticmethod def _default_keys_file(app: Optional[str], env: Optional[str], server: Optional[str]) -> Optional[str]: def infer_app_from_env(env: str) -> Optional[str]: # noqa @@ -566,6 +566,22 @@ def raise_for_status(self): # noqa response = TestResponseWrapper(response) return response + @staticmethod + def _create_vapp(arg: Union[TestApp, VirtualApp, PyramidRouter, str] = None) -> TestApp: + if isinstance(arg, TestApp): + return arg + elif isinstance(arg, VirtualApp): + if not isinstance(arg.wrapped_app, TestApp): + raise Exception("Portal._create_vapp VirtualApp argument error.") + return arg.wrapped_app + if isinstance(arg, PyramidRouter): + router = arg + elif isinstance(arg, str) or not arg: + router = pyramid_get_app(arg or "development.ini", "app") + else: + raise Exception("Portal._create_vapp argument error.") + return TestApp(router, {"HTTP_ACCEPT": Portal.MIME_TYPE_JSON, "REMOTE_USER": "TEST"}) + @staticmethod def create_for_testing(arg: Optional[Union[str, bool, List[dict], dict, Callable]] = None) -> Portal: if isinstance(arg, list) or isinstance(arg, dict) or isinstance(arg, Callable): @@ -597,22 +613,6 @@ def create_for_testing(arg: Optional[Union[str, bool, List[dict], dict, Callable with temporary_file(content=minimal_ini_for_testing, suffix=".ini") as ini_file: return Portal(ini_file) - @staticmethod - def _create_vapp(arg: Union[TestApp, VirtualApp, PyramidRouter, str] = None) -> TestApp: - if isinstance(arg, TestApp): - return arg - elif isinstance(arg, VirtualApp): - if not isinstance(arg.wrapped_app, TestApp): - raise Exception("Portal._create_vapp VirtualApp argument error.") - return arg.wrapped_app - if isinstance(arg, PyramidRouter): - router = arg - elif isinstance(arg, str) or not arg: - router = pyramid_get_app(arg or "development.ini", "app") - else: - raise Exception("Portal._create_vapp argument error.") - return TestApp(router, {"HTTP_ACCEPT": Portal.MIME_TYPE_JSON, "REMOTE_USER": "TEST"}) - @staticmethod def _create_router_for_testing(endpoints: Optional[List[Dict[str, Union[str, Callable]]]] = None) -> PyramidRouter: if isinstance(endpoints, dict): diff --git a/dcicutils/structured_data.py b/dcicutils/structured_data.py index 555cf738b..8f6b596a8 100644 --- a/dcicutils/structured_data.py +++ b/dcicutils/structured_data.py @@ -351,18 +351,23 @@ def get_counts() -> Tuple[int, int]: def _load_json_file(self, file: str) -> None: with open(file) as f: - file_json = json.load(f) - schema_inferred_from_file_name = Schema.type_name(file) - if self._portal.get_schema(schema_inferred_from_file_name) is not None: + item = json.load(f) + if ((schema_name_inferred_from_file_name := Schema.type_name(file)) and + (self._portal.get_schema(schema_name_inferred_from_file_name) is not None)): # noqa # If the JSON file name looks like a schema name then assume it # contains an object or an array of object of that schema type. - self._add(Schema.type_name(file), file_json) - elif isinstance(file_json, dict): + if self._merge: + item = self._merge_with_existing_portal_object(item, schema_name_inferred_from_file_name) + self._add(Schema.type_name(file), item) + elif isinstance(item, dict): # Otherwise if the JSON file name does not look like a schema name then # assume it a dictionary where each property is the name of a schema, and # which (each property) contains a list of object of that schema type. - for schema_name in file_json: - self._add(schema_name, file_json[schema_name]) + for schema_name in item: + item = item[schema_name] + if self._merge: + item = self._merge_with_existing_portal_object(item, schema_name) + self._add(schema_name, item) def _load_reader(self, reader: RowReader, type_name: str) -> None: schema = None @@ -386,14 +391,12 @@ def _load_reader(self, reader: RowReader, type_name: str) -> None: self._add_properties(structured_row, self._autoadd_properties, schema) # New merge functionality (2024-05-25). if self._merge: - for identifying_path in self.get_identifying_paths(self._portal, structured_row, type_name): - if existing_portal_object := self._portal.get_metadata(identifying_path): - structured_row = merge_objects(existing_portal_object, structured_row) + structured_row = self._merge_with_existing_portal_object(structured_row, schema_name) if (prune_error := self._prune_structured_row(structured_row)) is not None: self._note_error({"src": create_dict(type=schema_name, row=reader.row_number), "error": prune_error}, "validation") else: - self._add(type_name, structured_row) + self._add(type_name, structured_row) # TODO: why type_name and not schema_name? if self._progress: self._progress({ PROGRESS.LOAD_ITEM: self._nrows, @@ -434,6 +437,18 @@ def _add_properties(self, structured_row: dict, properties: dict, schema: Option if name not in structured_row and (not schema or schema.data.get("properties", {}).get(name)): structured_row[name] = properties[name] + def _merge_with_existing_portal_object(self, portal_object: dict, portal_type: str) -> dict: + """ + Given a Portal object (presumably/in-practice from the given metadata), if there is + an existing Portal item, identified by the identifying properties for the given object, + then merges the given object into the existing one and returns the result; otherwise + just returns the given object. Note that the given object may be CHANGED in place. + """ + for identifying_path in self._portal.get_identifying_paths(portal_object, portal_type): + if existing_portal_object := self._portal.get_metadata(identifying_path, raw=True): + return merge_objects(existing_portal_object, portal_object) + return portal_object + def _is_ref_lookup_specified_type(ref_lookup_flags: int) -> bool: return (ref_lookup_flags & Portal.LOOKUP_SPECIFIED_TYPE) == Portal.LOOKUP_SPECIFIED_TYPE diff --git a/pyproject.toml b/pyproject.toml index 01a761bad..b14d9fb12 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "dcicutils" -version = "8.8.6.1b1" +version = "8.8.6.1b2" description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources" authors = ["4DN-DCIC Team "] license = "MIT" From d7ff65c13f0d2ec8089ec85e905b90b469fa19d5 Mon Sep 17 00:00:00 2001 From: David Michaels Date: Sun, 26 May 2024 09:20:36 -0400 Subject: [PATCH 03/37] adding merge capability in ingester for partial metadata --- dcicutils/structured_data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dcicutils/structured_data.py b/dcicutils/structured_data.py index 8f6b596a8..7e0034eae 100644 --- a/dcicutils/structured_data.py +++ b/dcicutils/structured_data.py @@ -445,7 +445,7 @@ def _merge_with_existing_portal_object(self, portal_object: dict, portal_type: s just returns the given object. Note that the given object may be CHANGED in place. """ for identifying_path in self._portal.get_identifying_paths(portal_object, portal_type): - if existing_portal_object := self._portal.get_metadata(identifying_path, raw=True): + if existing_portal_object := self._portal.get_metadata(identifying_path, raw=True, raise_exception=False): return merge_objects(existing_portal_object, portal_object) return portal_object From 1deed345807d670fe8cd58cf00b27760e63e68cf Mon Sep 17 00:00:00 2001 From: David Michaels Date: Sun, 26 May 2024 09:20:44 -0400 Subject: [PATCH 04/37] version --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index b14d9fb12..4bc10abba 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "dcicutils" -version = "8.8.6.1b2" +version = "8.8.6.1b3" description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources" authors = ["4DN-DCIC Team "] license = "MIT" From 94100d3e542d61e7e373c35ab8572b36bbd8a688 Mon Sep 17 00:00:00 2001 From: David Michaels Date: Sun, 26 May 2024 11:30:27 -0400 Subject: [PATCH 05/37] comments --- dcicutils/portal_utils.py | 12 ++++++------ dcicutils/submitr/ref_lookup_strategy.py | 4 ++-- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/dcicutils/portal_utils.py b/dcicutils/portal_utils.py index b0e9d7762..d172b0c25 100644 --- a/dcicutils/portal_utils.py +++ b/dcicutils/portal_utils.py @@ -307,7 +307,10 @@ def ping(self) -> bool: @lru_cache(maxsize=100) def get_schema(self, schema_name: str) -> Optional[dict]: - return get_schema(self.schema_name(schema_name), portal_vapp=self.vapp, key=self.key) + try: + return get_schema(self.schema_name(schema_name), portal_vapp=self.vapp, key=self.key) + except Exception: + return None @lru_cache(maxsize=1) def get_schemas(self) -> dict: @@ -446,14 +449,11 @@ def get_identifying_paths(self, portal_object: dict, portal_type: Optional[str] def get_identifying_property_names(self, schema: Union[str, dict]) -> List[str]: """ Returns the list of identifying property names for the given Portal schema, which may - be either a schema name or a schema object; empty list of none or otherwise not found. + be either a schema name or a schema object; empty list if none or otherwise not found. """ results = [] if isinstance(schema, str): - try: - if not (schema := self.get_schema(schema)): - return results - except Exception: + if not (schema := self.get_schema(schema)): return results elif not isinstance(schema, dict): return results diff --git a/dcicutils/submitr/ref_lookup_strategy.py b/dcicutils/submitr/ref_lookup_strategy.py index 55c4d2827..7a03a7315 100644 --- a/dcicutils/submitr/ref_lookup_strategy.py +++ b/dcicutils/submitr/ref_lookup_strategy.py @@ -20,7 +20,7 @@ def ref_lookup_strategy(portal: Portal, type_name: str, schema: dict, value: str def ref_validator(schema: Optional[dict], property_name: Optional[str], property_value: Optional[str]) -> Optional[bool]: """ - Returns False iff the type represented by the given schema, can NOT be referenced by + Returns False iff the type represented by the given schema can NOT be referenced by the given property name with the given property value, otherwise returns None. For example, if the schema is for the UnalignedReads type and the property name @@ -32,7 +32,7 @@ def ref_validator(schema: Optional[dict], the type and makes sure any patterns (e.g. for submitted_id or uuid) are ahered to. The goal (in structured_data) being to detect if a type is being referenced in such - a way that cannot possibly be allowed, i.e. because none of its identifying types + a way that can NOT possibly be allowed, i.e. because none of its identifying types are in the required form (if indeed there any requirements). Note that it is guaranteed that the given property name is indeed an identifying property for the given type. """ From 6b066fdff1d3ad5607444b28c69cbff1f0b4cabf Mon Sep 17 00:00:00 2001 From: David Michaels Date: Sun, 26 May 2024 11:32:30 -0400 Subject: [PATCH 06/37] simplfiy ctor for PortalObject --- dcicutils/portal_object_utils.py | 8 +++----- pyproject.toml | 2 +- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/dcicutils/portal_object_utils.py b/dcicutils/portal_object_utils.py index 200ea698d..4c14e790a 100644 --- a/dcicutils/portal_object_utils.py +++ b/dcicutils/portal_object_utils.py @@ -14,11 +14,9 @@ class PortalObject: _PROPERTY_DELETION_SENTINEL = RowReader.CELL_DELETION_SENTINEL - def __init__(self, data: dict, portal: Portal = None, - schema: Optional[Union[dict, Schema]] = None, type: Optional[str] = None) -> None: + def __init__(self, data: dict, portal: Portal = None, type: Optional[str] = None) -> None: self._data = data if isinstance(data, dict) else {} self._portal = portal if isinstance(portal, Portal) else None - self._schema = schema if isinstance(schema, dict) else (schema.data if isinstance(schema, Schema) else None) self._type = type if isinstance(type, str) else "" @property @@ -32,7 +30,7 @@ def portal(self) -> Optional[Portal]: @property @lru_cache(maxsize=1) def type(self) -> str: - return self._type or Portal.get_schema_type(self._data) or (Schema(self._schema).type if self._schema else "") + return self._type or Portal.get_schema_type(self._data) or "" @property @lru_cache(maxsize=1) @@ -47,7 +45,7 @@ def uuid(self) -> Optional[str]: @property @lru_cache(maxsize=1) def schema(self) -> Optional[dict]: - return self._schema if self._schema else (self._portal.get_schema(self.type) if self._portal else None) + return self._portal.get_schema(self.type) if self._portal else None def copy(self) -> PortalObject: return PortalObject(deepcopy(self.data), portal=self.portal, type=self.type) diff --git a/pyproject.toml b/pyproject.toml index 4bc10abba..8f2a24b7f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "dcicutils" -version = "8.8.6.1b3" +version = "8.8.6.1b4" description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources" authors = ["4DN-DCIC Team "] license = "MIT" From 8f320a595b5fd3b308eae2ca62bd8da64aa5e839 Mon Sep 17 00:00:00 2001 From: David Michaels Date: Sun, 26 May 2024 12:04:37 -0400 Subject: [PATCH 07/37] refactoring identifying properties related code --- dcicutils/portal_object_utils.py | 40 +++++++++++--------------------- dcicutils/portal_utils.py | 24 +++++++++++++++---- 2 files changed, 33 insertions(+), 31 deletions(-) diff --git a/dcicutils/portal_object_utils.py b/dcicutils/portal_object_utils.py index 4c14e790a..70744761f 100644 --- a/dcicutils/portal_object_utils.py +++ b/dcicutils/portal_object_utils.py @@ -14,7 +14,7 @@ class PortalObject: _PROPERTY_DELETION_SENTINEL = RowReader.CELL_DELETION_SENTINEL - def __init__(self, data: dict, portal: Portal = None, type: Optional[str] = None) -> None: + def __init__(self, data: dict, portal: Optional[Portal] = None, type: Optional[str] = None) -> None: self._data = data if isinstance(data, dict) else {} self._portal = portal if isinstance(portal, Portal) else None self._type = type if isinstance(type, str) else "" @@ -58,38 +58,26 @@ def identifying_properties(self) -> Optional[List[str]]: Implicitly include "uuid" and "identifier" properties as identifying properties if they are actually properties in the object schema, and favor these (first); defavor "aliases"; no other ordering defined. """ - if not (schema := self.schema) or not (schema_identifying_properties := schema.get("identifyingProperties")): - return None - identifying_properties = [] - for identifying_property in schema_identifying_properties: - if identifying_property not in ["uuid", "identifier", "aliases"]: - if self._data.get(identifying_property): - identifying_properties.append(identifying_property) - if self._data.get("identifier"): - identifying_properties.insert(0, "identifier") - if self._data.get("uuid"): - identifying_properties.insert(0, "uuid") - if "aliases" in schema_identifying_properties and self._data.get("aliases"): - identifying_properties.append("aliases") - return identifying_properties or None + return self._portal.get_identifying_property_names(self.type, portal_object=self._data) if self._portal else [] @lru_cache(maxsize=8192) def lookup(self, raw: bool = False, ref_lookup_strategy: Optional[Callable] = None) -> Tuple[Optional[PortalObject], Optional[str], int]: + if not (identifying_paths := self._get_identifying_paths(ref_lookup_strategy=ref_lookup_strategy)): + return None, None, 0 nlookups = 0 first_identifying_path = None try: - if identifying_paths := self._get_identifying_paths(ref_lookup_strategy=ref_lookup_strategy): - for identifying_path in identifying_paths: - if not first_identifying_path: - first_identifying_path = identifying_path - nlookups += 1 - if (value := self._portal.get(identifying_path, raw=raw)) and (value.status_code == 200): - return ( - PortalObject(value.json(), portal=self._portal, type=self.type if raw else None), - identifying_path, - nlookups - ) + for identifying_path in identifying_paths: + if not first_identifying_path: + first_identifying_path = identifying_path + nlookups += 1 + if self._portal and (item := self._portal.get(identifying_path, raw=raw)) and (item.status_code == 200): + return ( + PortalObject(item.json(), portal=self._portal, type=self.type if raw else None), + identifying_path, + nlookups + ) except Exception: pass return None, first_identifying_path, nlookups diff --git a/dcicutils/portal_utils.py b/dcicutils/portal_utils.py index d172b0c25..35cb7e1e8 100644 --- a/dcicutils/portal_utils.py +++ b/dcicutils/portal_utils.py @@ -446,10 +446,13 @@ def get_identifying_paths(self, portal_object: dict, portal_type: Optional[str] return results @function_cache(maxsize=100, serialize_key=True) - def get_identifying_property_names(self, schema: Union[str, dict]) -> List[str]: + def get_identifying_property_names(self, schema: Union[str, dict], + portal_object: Optional[dict] = None) -> List[str]: """ - Returns the list of identifying property names for the given Portal schema, which may - be either a schema name or a schema object; empty list if none or otherwise not found. + Returns the list of identifying property names for the given Portal schema, which may be + either a schema name or a schema object. If a Portal object is also given then restricts this + set of identifying properties to those which actually have values within this Portal object. + Returns empty list if no identifying properties or somehow otherwise not found. """ results = [] if isinstance(schema, str): @@ -459,11 +462,22 @@ def get_identifying_property_names(self, schema: Union[str, dict]) -> List[str]: return results if not (identifying_properties := get_identifying_properties(schema)): return results - identifying_properties = [*identifying_properties] - for favored_identifying_property in reversed(["uuid", "identifier"]): + identifying_properties = list(set(identifying_properties)) # paranoid dedup + identifying_properties = [*identifying_properties] # copy so as not to change schema if given + favored_identifying_properties = ["uuid", "identifier"] + unfavored_identifying_properties = ["aliases"] + for favored_identifying_property in reversed(favored_identifying_properties): if favored_identifying_property in identifying_properties: identifying_properties.remove(favored_identifying_property) identifying_properties.insert(0, favored_identifying_property) + for unfavored_identifying_property in unfavored_identifying_properties: + if unfavored_identifying_property in identifying_properties: + identifying_properties.remove(unfavored_identifying_property) + identifying_properties.append(unfavored_identifying_property) + if isinstance(portal_object, dict): + for identifying_property in [*identifying_properties]: + if portal_object.get(identifying_property) is None: + identifying_properties.remove(identifying_property) return identifying_properties def url(self, url: str, raw: bool = False, database: bool = False) -> str: From ca170c1a904097bd4c1e1a30d0669b78cbb5f7cb Mon Sep 17 00:00:00 2001 From: David Michaels Date: Sun, 26 May 2024 17:13:48 -0400 Subject: [PATCH 08/37] comments --- dcicutils/portal_utils.py | 42 ++++++++++++++++++------ dcicutils/submitr/ref_lookup_strategy.py | 30 +++++++++-------- 2 files changed, 48 insertions(+), 24 deletions(-) diff --git a/dcicutils/portal_utils.py b/dcicutils/portal_utils.py index 35cb7e1e8..9ddc6c98f 100644 --- a/dcicutils/portal_utils.py +++ b/dcicutils/portal_utils.py @@ -422,11 +422,12 @@ def get_schema_subtype_names(self, type_name: str) -> List[str]: return schemas_super_type_map.get(type_name, []) @function_cache(maxsize=100, serialize_key=True) - def get_identifying_paths(self, portal_object: dict, portal_type: Optional[str] = None) -> List[str]: + def get_identifying_paths(self, portal_object: dict, portal_type: Optional[str] = None, + ref_lookup_strategy: Optional[Callable] = None) -> List[str]: """ - Returns the list of the identifying Portal (URL) paths for the given Portal object. Favors any - uuid based path and defavors aliases based paths (ala self.get_identifying_property_names); - no other ordering defined. Returns empty list of none or otherwise not found. + Returns the list of the identifying Portal (URL) paths for the given Portal object. Favors any uuid + and identifier based paths and defavors aliases based paths (ala self.get_identifying_property_names); + no other ordering defined. Returns an empty list if no identifying properties or otherwise not found. """ results = [] if not isinstance(portal_object, dict): @@ -440,6 +441,26 @@ def get_identifying_paths(self, portal_object: dict, portal_type: Optional[str] for identifying_value_item in identifying_value: results.append(f"/{portal_type}/{identifying_value_item}") elif identifying_property == "uuid": + # + # Note this idiosyncrasy with Portal paths: the only way we do NOT get a (HTTP 301) redirect + # is if we use the lower-case-dashed-plural based version of the path, e.g. all of these: + # + # - /d13d06c1-218e-4f61-aaf0-91f226248b3c + # - /d13d06c1-218e-4f61-aaf0-91f226248b3c/ + # - /FileFormat/d13d06c1-218e-4f61-aaf0-91f226248b3c + # - /FileFormat/d13d06c1-218e-4f61-aaf0-91f226248b3c/ + # - /files-formats/d13d06c1-218e-4f61-aaf0-91f226248b3c + # + # Will result in a (HTTP 301) redirect to: + # + # - /files-formats/d13d06c1-218e-4f61-aaf0-91f226248b3c/ + # + # Unfortunately, this code here has no reasonable way of getting that lower-case-dashed-plural + # based name (e.g. file-formats) from the schema/portal type name (e.g. FileFormat); as the + # information is contained, for this example, in the snovault.collection decorator for the + # endpoint definition in smaht-portal/.../types/file_format.py. Unfortunately merely because + # behind-the-scenes an extra round-trip HTTP request will occurm but happens automatically. + # results.append(f"/{identifying_value}") else: results.append(f"/{portal_type}/{identifying_value}") @@ -452,7 +473,8 @@ def get_identifying_property_names(self, schema: Union[str, dict], Returns the list of identifying property names for the given Portal schema, which may be either a schema name or a schema object. If a Portal object is also given then restricts this set of identifying properties to those which actually have values within this Portal object. - Returns empty list if no identifying properties or somehow otherwise not found. + Favors the uuid and identifier property names and defavors the aliases property name; no other + ordering imposed. Returns empty list if no identifying properties or otherwise not found. """ results = [] if isinstance(schema, str): @@ -465,15 +487,15 @@ def get_identifying_property_names(self, schema: Union[str, dict], identifying_properties = list(set(identifying_properties)) # paranoid dedup identifying_properties = [*identifying_properties] # copy so as not to change schema if given favored_identifying_properties = ["uuid", "identifier"] - unfavored_identifying_properties = ["aliases"] + defavored_identifying_properties = ["aliases"] for favored_identifying_property in reversed(favored_identifying_properties): if favored_identifying_property in identifying_properties: identifying_properties.remove(favored_identifying_property) identifying_properties.insert(0, favored_identifying_property) - for unfavored_identifying_property in unfavored_identifying_properties: - if unfavored_identifying_property in identifying_properties: - identifying_properties.remove(unfavored_identifying_property) - identifying_properties.append(unfavored_identifying_property) + for defavored_identifying_property in defavored_identifying_properties: + if defavored_identifying_property in identifying_properties: + identifying_properties.remove(defavored_identifying_property) + identifying_properties.append(defavored_identifying_property) if isinstance(portal_object, dict): for identifying_property in [*identifying_properties]: if portal_object.get(identifying_property) is None: diff --git a/dcicutils/submitr/ref_lookup_strategy.py b/dcicutils/submitr/ref_lookup_strategy.py index 7a03a7315..c565b7db5 100644 --- a/dcicutils/submitr/ref_lookup_strategy.py +++ b/dcicutils/submitr/ref_lookup_strategy.py @@ -2,10 +2,13 @@ from typing import Optional from dcicutils.structured_data import Portal +# This function is exposed (to smaht-portal/ingester and smaht-submitr) only because previously, +# before it was fully developeda, we had differing behaviors; but this has been unified; this +# could now be internalized to structured_data, and portal_object_utils (TODO). def ref_lookup_strategy(portal: Portal, type_name: str, schema: dict, value: str) -> (int, Optional[str]): # - # FYI: Note this situation WRT object lookups ... + # Note this slight odd situation WRT object lookups by submitted_id and accession ... # # /{submitted_id} # NOT FOUND # /UnalignedReads/{submitted_id} # OK @@ -20,21 +23,20 @@ def ref_lookup_strategy(portal: Portal, type_name: str, schema: dict, value: str def ref_validator(schema: Optional[dict], property_name: Optional[str], property_value: Optional[str]) -> Optional[bool]: """ - Returns False iff the type represented by the given schema can NOT be referenced by - the given property name with the given property value, otherwise returns None. + Returns False iff objects of type represented by the given schema, CANNOT be referenced with + a Portal path using the given property name and its given property value, otherwise returns None. - For example, if the schema is for the UnalignedReads type and the property name - is accession, then we will return False iff the given property value is NOT a properly - formatted accession ID. Otherwise, we will return None, which indicates that the - caller (in dcicutils.structured_data.Portal.ref_exists) will continue executing - its default behavior, which is to check other ways in which the given type can NOT - be referenced by the given value, i.e. it checks other identifying properties for - the type and makes sure any patterns (e.g. for submitted_id or uuid) are ahered to. + For example, if the schema is for UnalignedReads and the property name is accession, then we will + return False iff the given property value is NOT a properly formatted accession ID; otherwise, we + will return None, which indicates that the caller (e.g. dcicutils.structured_data.Portal.ref_exists) + will continue executing its default behavior, which is to check other ways in which the given type + CANNOT be referenced by the given value, i.e. it checks other identifying properties for the type + and makes sure any patterns (e.g. for submitted_id or uuid) are ahered to. - The goal (in structured_data) being to detect if a type is being referenced in such - a way that can NOT possibly be allowed, i.e. because none of its identifying types - are in the required form (if indeed there any requirements). Note that it is guaranteed - that the given property name is indeed an identifying property for the given type. + The goal (in structured_data) being to detect if a type is being referenced in such a way that + CANNOT possibly be allowed, i.e. because none of its identifying types are in the required form, + if indeed there any requirements. It is assumed/guaranteed the given property name is indeed an + identifying property for the given type. """ if property_format := schema.get("properties", {}).get(property_name, {}).get("format"): if (property_format == "accession") and (property_name == "accession"): From 1de0176891c8ebb83600f85d0880b0721de5776d Mon Sep 17 00:00:00 2001 From: David Michaels Date: Sun, 26 May 2024 17:14:06 -0400 Subject: [PATCH 09/37] flake8 --- dcicutils/submitr/ref_lookup_strategy.py | 1 + 1 file changed, 1 insertion(+) diff --git a/dcicutils/submitr/ref_lookup_strategy.py b/dcicutils/submitr/ref_lookup_strategy.py index c565b7db5..cad657a76 100644 --- a/dcicutils/submitr/ref_lookup_strategy.py +++ b/dcicutils/submitr/ref_lookup_strategy.py @@ -6,6 +6,7 @@ # before it was fully developeda, we had differing behaviors; but this has been unified; this # could now be internalized to structured_data, and portal_object_utils (TODO). + def ref_lookup_strategy(portal: Portal, type_name: str, schema: dict, value: str) -> (int, Optional[str]): # # Note this slight odd situation WRT object lookups by submitted_id and accession ... From 1fd418c765bc97d70bb2d02a3e6bda1550b52d21 Mon Sep 17 00:00:00 2001 From: David Michaels Date: Sun, 26 May 2024 17:14:40 -0400 Subject: [PATCH 10/37] typo --- dcicutils/submitr/ref_lookup_strategy.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dcicutils/submitr/ref_lookup_strategy.py b/dcicutils/submitr/ref_lookup_strategy.py index cad657a76..ae9ae3127 100644 --- a/dcicutils/submitr/ref_lookup_strategy.py +++ b/dcicutils/submitr/ref_lookup_strategy.py @@ -3,7 +3,7 @@ from dcicutils.structured_data import Portal # This function is exposed (to smaht-portal/ingester and smaht-submitr) only because previously, -# before it was fully developeda, we had differing behaviors; but this has been unified; this +# before it was fully developed, we had differing behaviors; but this has been unified; this # could now be internalized to structured_data, and portal_object_utils (TODO). From 8577d965dd433eafab74d433c107dd6e128ac967 Mon Sep 17 00:00:00 2001 From: David Michaels Date: Mon, 27 May 2024 07:26:55 -0400 Subject: [PATCH 11/37] unified/migrated some portal_object_utils stuff to portal_utils --- dcicutils/portal_object_utils.py | 71 ++------ dcicutils/portal_utils.py | 202 +++++++++++++++++------ dcicutils/submitr/ref_lookup_strategy.py | 29 ++-- 3 files changed, 180 insertions(+), 122 deletions(-) diff --git a/dcicutils/portal_object_utils.py b/dcicutils/portal_object_utils.py index 70744761f..907281e0a 100644 --- a/dcicutils/portal_object_utils.py +++ b/dcicutils/portal_object_utils.py @@ -57,7 +57,9 @@ def identifying_properties(self) -> Optional[List[str]]: Returns the list of all identifying property names of this Portal object which actually have values. Implicitly include "uuid" and "identifier" properties as identifying properties if they are actually properties in the object schema, and favor these (first); defavor "aliases"; no other ordering defined. + Changed (2024-05-26) to use portal_utils.get_identifying_property_names; migrating some intricate stuff there. """ + # Migrating to and unifying this in portal_utils.Portal.get_identifying_paths (2024-05-26). return self._portal.get_identifying_property_names(self.type, portal_object=self._data) if self._portal else [] @lru_cache(maxsize=8192) @@ -144,65 +146,16 @@ def diff_deleting(value: Any) -> object: # noqa return diffs @lru_cache(maxsize=1) - def _get_identifying_paths(self, ref_lookup_strategy: Optional[Callable] = None) -> Optional[List[str]]: - """ - Returns a list of the possible Portal URL paths identifying this Portal object. - """ - identifying_paths = [] - if not (identifying_properties := self.identifying_properties): - if self.uuid: - if self.type: - identifying_paths.append(f"/{self.type}/{self.uuid}") - identifying_paths.append(f"/{self.uuid}") - return identifying_paths - for identifying_property in identifying_properties: - if identifying_value := self._data.get(identifying_property): - if identifying_property == "uuid": - if self.type: - identifying_paths.append(f"/{self.type}/{identifying_value}") - identifying_paths.append(f"/{identifying_value}") - # For now at least we include the path both with and without the schema type component, - # as for some identifying values, it works (only) with, and some, it works (only) without. - # For example: If we have FileSet with "accession", an identifying property, with value - # SMAFSFXF1RO4 then /SMAFSFXF1RO4 works but /FileSet/SMAFSFXF1RO4 does not; and - # conversely using "submitted_id", also an identifying property, with value - # UW_FILE-SET_COLO-829BL_HI-C_1 then /UW_FILE-SET_COLO-829BL_HI-C_1 does - # not work but /FileSet/UW_FILE-SET_COLO-829BL_HI-C_1 does work. - elif isinstance(identifying_value, list): - for identifying_value_item in identifying_value: - if self.type: - identifying_paths.append(f"/{self.type}/{identifying_value_item}") - identifying_paths.append(f"/{identifying_value_item}") - else: - # TODO: Import from somewhere ... - lookup_options = 0 - if schema := self.schema: - # TODO: Hook into the ref_lookup_strategy thing in structured_data to make - # sure we check accession format (since it does not have a pattern). - if callable(ref_lookup_strategy): - lookup_options, ref_validator = ref_lookup_strategy( - self._portal, self.type, schema, identifying_value) - if callable(ref_validator): - if ref_validator(schema, identifying_property, identifying_value) is False: - continue - if pattern := schema.get("properties", {}).get(identifying_property, {}).get("pattern"): - if not re.match(pattern, identifying_value): - # If this identifying value is for a (identifying) property which has a - # pattern, and the value does NOT match the pattern, then do NOT include - # this value as an identifying path, since it cannot possibly be found. - continue - if not lookup_options: - lookup_options = Portal.LOOKUP_DEFAULT - if Portal.is_lookup_root_first(lookup_options): - identifying_paths.append(f"/{identifying_value}") - if Portal.is_lookup_specified_type(lookup_options) and self.type: - identifying_paths.append(f"/{self.type}/{identifying_value}") - if Portal.is_lookup_root(lookup_options) and not Portal.is_lookup_root_first(lookup_options): - identifying_paths.append(f"/{identifying_value}") - if Portal.is_lookup_subtypes(lookup_options): - for subtype_name in self._portal.get_schema_subtype_names(self.type): - identifying_paths.append(f"/{subtype_name}/{identifying_value}") - return identifying_paths or None + def _get_identifying_paths(self, all: bool = True, + ref_lookup_strategy: Optional[Callable] = None) -> Optional[List[str]]: + if not self._portal and (uuid := self.uuid): + if all is True and (type := self.type): + return [f"/{type}/{uuid}", f"/{uuid}"] + return [f"/{uuid}"] + # Migrating to and unifying this in portal_utils.Portal.get_identifying_paths (2024-05-26). + return self._portal.get_identifying_paths(self._data, + portal_type=self.schema, all=all, + lookup_strategy=ref_lookup_strategy) if self._portal else None def _normalized_refs(self, refs: List[dict]) -> Tuple[PortalObject, int]: """ diff --git a/dcicutils/portal_utils.py b/dcicutils/portal_utils.py index 9ddc6c98f..af96a34e9 100644 --- a/dcicutils/portal_utils.py +++ b/dcicutils/portal_utils.py @@ -50,15 +50,16 @@ class Portal: FILE_TYPE_SCHEMA_NAME = "File" # Object lookup strategies; on a per-reference (type/value) basis, used currently ONLY by - # structured_data.py; controlled by an optional ref_lookup_strategy callable; default is + # structured_data.py; controlled by an optional lookup_strategy callable; default is # lookup at root path but after the specified type path lookup, and then lookup all subtypes; # can choose to lookup root path first, or not lookup root path at all, or not lookup - # subtypes at all; the ref_lookup_strategy callable if specified should take a type_name + # subtypes at all; the lookup_strategy callable if specified should take a type_name # and value (string) arguements and return an integer of any of the below ORed together. # The main purpose of this is optimization; to minimize portal lookups; since for example, # currently at least, /{type}/{accession} does not work but /{accession} does; so we # currently (smaht-portal/.../ingestion_processors) use LOOKUP_ROOT_FIRST for this. # And current usage NEVER has LOOKUP_SUBTYPES turned OFF; but support just in case. + LOOKUP_UNDEFINED = 0 LOOKUP_SPECIFIED_TYPE = 0x0001 LOOKUP_ROOT = 0x0002 LOOKUP_ROOT_FIRST = 0x0004 | LOOKUP_ROOT @@ -207,23 +208,6 @@ def app(self) -> Optional[str]: def vapp(self) -> Optional[TestApp]: return self._vapp - @staticmethod - def is_lookup_specified_type(lookup_options: int) -> bool: - return (lookup_options & - Portal.LOOKUP_SPECIFIED_TYPE) == Portal.LOOKUP_SPECIFIED_TYPE - - @staticmethod - def is_lookup_root(lookup_options: int) -> bool: - return (lookup_options & Portal.LOOKUP_ROOT) == Portal.LOOKUP_ROOT - - @staticmethod - def is_lookup_root_first(lookup_options: int) -> bool: - return (lookup_options & Portal.LOOKUP_ROOT_FIRST) == Portal.LOOKUP_ROOT_FIRST - - @staticmethod - def is_lookup_subtypes(lookup_options: int) -> bool: - return (lookup_options & Portal.LOOKUP_SUBTYPES) == Portal.LOOKUP_SUBTYPES - def get(self, url: str, follow: bool = True, raw: bool = False, database: bool = False, raise_for_status: bool = False, **kwargs) -> OptionalResponse: url = self.url(url, raw, database) @@ -422,48 +406,101 @@ def get_schema_subtype_names(self, type_name: str) -> List[str]: return schemas_super_type_map.get(type_name, []) @function_cache(maxsize=100, serialize_key=True) - def get_identifying_paths(self, portal_object: dict, portal_type: Optional[str] = None, - ref_lookup_strategy: Optional[Callable] = None) -> List[str]: + def get_identifying_paths(self, portal_object: dict, portal_type: Optional[Union[str, dict]] = None, + all: bool = True, lookup_strategy: Optional[Union[Callable, bool]] = None) -> List[str]: """ Returns the list of the identifying Portal (URL) paths for the given Portal object. Favors any uuid and identifier based paths and defavors aliases based paths (ala self.get_identifying_property_names); no other ordering defined. Returns an empty list if no identifying properties or otherwise not found. + Note that this is a newer version of what was in portal_object_utils and just uses the ref_lookup_stratey + module directly, as it no longer needs to be exposed (to smaht-portal/ingester and smaht-submitr) and so + this is a first step toward internalizing it to structured_data/portal_utils/portal_object_utils usages. """ + def is_lookup_specified_type(lookup_options: int) -> bool: + return (lookup_options & Portal.LOOKUP_SPECIFIED_TYPE) == Portal.LOOKUP_SPECIFIED_TYPE + def is_lookup_root(lookup_options: int) -> bool: # noqa + return (lookup_options & Portal.LOOKUP_ROOT) == Portal.LOOKUP_ROOT + def is_lookup_root_first(lookup_options: int) -> bool: # noqa + return (lookup_options & Portal.LOOKUP_ROOT_FIRST) == Portal.LOOKUP_ROOT_FIRST + def is_lookup_subtypes(lookup_options: int) -> bool: # noqa + return (lookup_options & Portal.LOOKUP_SUBTYPES) == Portal.LOOKUP_SUBTYPES + results = [] if not isinstance(portal_object, dict): return results - if not isinstance(portal_type, str) or not portal_type: - if not (portal_type := self.get_schema_type(portal_object)): - return results + if not (isinstance(portal_type, str) and portal_type): + if isinstance(portal_type, dict): + # It appears that the given portal_type is an actual schema dictionary. + portal_type = self.schema_name(portal_type.get("title")) + if not (isinstance(portal_type, str) and portal_type): + if not (portal_type := self.get_schema_type(portal_object)): + return results + if not callable(lookup_strategy): + lookup_strategy = None if lookup_strategy is False else Portal._lookup_strategy for identifying_property in self.get_identifying_property_names(portal_type): - if identifying_value := portal_object.get(identifying_property): - if isinstance(identifying_value, list): - for identifying_value_item in identifying_value: + if not (identifying_value := portal_object.get(identifying_property)): + continue + # The get_identifying_property_names call above ensures uuid is first if it is in the object. + # And also note that ALL schemas do in fact have identifyingProperties which do in fact have + # uuid, except for a couple "Test" ones, and (for some reason) SubmittedItem; otherwise we + # might have a special case to check the Portal object explicitly for uuid, but no need. + if identifying_property == "uuid": + # + # Note this idiosyncrasy with Portal paths: the only way we do NOT get a (HTTP 301) redirect + # is if we use the lower-case-dashed-plural based version of the path, e.g. all of these: + # + # - /d13d06c1-218e-4f61-aaf0-91f226248b3c + # - /d13d06c1-218e-4f61-aaf0-91f226248b3c/ + # - /FileFormat/d13d06c1-218e-4f61-aaf0-91f226248b3c + # - /FileFormat/d13d06c1-218e-4f61-aaf0-91f226248b3c/ + # - /files-formats/d13d06c1-218e-4f61-aaf0-91f226248b3c + # + # Will result in a (HTTP 301) redirect to: + # + # - /files-formats/d13d06c1-218e-4f61-aaf0-91f226248b3c/ + # + # Unfortunately, this code here has no reasonable way of getting that lower-case-dashed-plural + # based name (e.g. file-formats) from the schema/portal type name (e.g. FileFormat); as the + # information is contained, for this example, in the snovault.collection decorator for the + # endpoint definition in smaht-portal/.../types/file_format.py. Unfortunately merely because + # behind-the-scenes an extra round-trip HTTP request will occur, but happens automatically. + # And note the disction of just using /{uuid} here rather than /{type}/{uuid} as in the else + # statement below is not really necessary; just here for emphasis that this is all that's needed. + # + if all is True: + results.append(f"/{portal_type}/{identifying_value}") + results.append(f"/{identifying_value}") + elif isinstance(identifying_value, list): + for identifying_value_item in identifying_value: + if identifying_value_item: results.append(f"/{portal_type}/{identifying_value_item}") - elif identifying_property == "uuid": - # - # Note this idiosyncrasy with Portal paths: the only way we do NOT get a (HTTP 301) redirect - # is if we use the lower-case-dashed-plural based version of the path, e.g. all of these: - # - # - /d13d06c1-218e-4f61-aaf0-91f226248b3c - # - /d13d06c1-218e-4f61-aaf0-91f226248b3c/ - # - /FileFormat/d13d06c1-218e-4f61-aaf0-91f226248b3c - # - /FileFormat/d13d06c1-218e-4f61-aaf0-91f226248b3c/ - # - /files-formats/d13d06c1-218e-4f61-aaf0-91f226248b3c - # - # Will result in a (HTTP 301) redirect to: - # - # - /files-formats/d13d06c1-218e-4f61-aaf0-91f226248b3c/ - # - # Unfortunately, this code here has no reasonable way of getting that lower-case-dashed-plural - # based name (e.g. file-formats) from the schema/portal type name (e.g. FileFormat); as the - # information is contained, for this example, in the snovault.collection decorator for the - # endpoint definition in smaht-portal/.../types/file_format.py. Unfortunately merely because - # behind-the-scenes an extra round-trip HTTP request will occurm but happens automatically. - # + if all is True: + results.append(f"/{identifying_value_item}") + else: + lookup_options = Portal.LOOKUP_UNDEFINED + if schema := self.get_schema(portal_type): + if callable(lookup_strategy): + lookup_options, validator = lookup_strategy(self._portal, self.type, schema, identifying_value) + if callable(validator): + if validator(schema, identifying_property, identifying_value) is False: + continue + if pattern := schema.get("properties", {}).get(identifying_property, {}).get("pattern"): + if not re.match(pattern, identifying_value): + # If this identifying value is for a (identifying) property which has a + # pattern, and the value does NOT match the pattern, then do NOT include + # this value as an identifying path, since it cannot possibly be found. + continue + if lookup_options == Portal.LOOKUP_UNDEFINED: + lookup_options = Portal.LOOKUP_DEFAULT + if is_lookup_root_first(lookup_options): results.append(f"/{identifying_value}") - else: - results.append(f"/{portal_type}/{identifying_value}") + if is_lookup_specified_type(lookup_options) and self.type: + results.append(f"/{self.type}/{identifying_value}") + if is_lookup_root(lookup_options) and not is_lookup_root_first(lookup_options): + results.append(f"/{identifying_value}") + if is_lookup_subtypes(lookup_options): + for subtype_name in self._portal.get_schema_subtype_names(self.type): + results.append(f"/{subtype_name}/{identifying_value}") return results @function_cache(maxsize=100, serialize_key=True) @@ -502,6 +539,71 @@ def get_identifying_property_names(self, schema: Union[str, dict], identifying_properties.remove(identifying_property) return identifying_properties + @staticmethod + def _lookup_strategy(portal: Portal, type_name: str, schema: dict, value: str) -> (int, Optional[str]): + # + # Note this slight odd situation WRT object lookups by submitted_id and accession: + # -----------------------------+-----------------------------------------------+---------------+ + # PATH | EXAMPLE | LOOKUP RESULT | + # -----------------------------+-----------------------------------------------+---------------+ + # /submitted_id | //UW_FILE-SET_COLO-829BL_HI-C_1 | NOT FOUND | + # /UnalignedReads/submitted_id | /UnalignedReads/UW_FILE-SET_COLO-829BL_HI-C_1 | FOUND | + # /SubmittedFile/submitted_id | /SubmittedFile/UW_FILE-SET_COLO-829BL_HI-C_1 | FOUND | + # /File/submitted_id | /File/UW_FILE-SET_COLO-829BL_HI-C_1 | NOT FOUND | + # -----------------------------+-----------------------------------------------+---------------+ + # /accession | /SMAFSFXF1RO4 | FOUND | + # /UnalignedReads/accession | /UnalignedReads/SMAFSFXF1RO4 | NOT FOUND | + # /SubmittedFile/accession | /SubmittedFile/SMAFSFXF1RO4 | NOT FOUND | + # /File/accession | /File/SMAFSFXF1RO4 | FOUND | + # -----------------------------+-----------------------------------------------+---------------+ + # + def ref_validator(schema: Optional[dict], + property_name: Optional[str], property_value: Optional[str]) -> Optional[bool]: + """ + Returns False iff objects of type represented by the given schema, CANNOT be referenced with + a Portal path using the given property name and its given property value, otherwise returns None. + + For example, if the schema is for UnalignedReads and the property name is accession, then we will + return False iff the given property value is NOT a properly formatted accession ID; otherwise, we + will return None, which indicates that the caller (e.g. dcicutils.structured_data.Portal.ref_exists) + will continue executing its default behavior, which is to check other ways in which the given type + CANNOT be referenced by the given value, i.e. it checks other identifying properties for the type + and makes sure any patterns (e.g. for submitted_id or uuid) are ahered to. + + The goal (in structured_data) being to detect if a type is being referenced in such a way that + CANNOT possibly be allowed, i.e. because none of its identifying types are in the required form, + if indeed there any requirements. It is assumed/guaranteed the given property name is indeed an + identifying property for the given type. + """ + if property_format := schema.get("properties", {}).get(property_name, {}).get("format"): + if (property_format == "accession") and (property_name == "accession"): + if not Portal._is_accession_id(property_value): + return False + return None + + DEFAULT_RESULT = (Portal.LOOKUP_DEFAULT, ref_validator) + if not value: + return DEFAULT_RESULT + if not schema: + if not isinstance(portal, Portal) or not (schema := portal.get_schema(type_name)): + return DEFAULT_RESULT + if schema_properties := schema.get("properties"): + if schema_properties.get("accession") and Portal._is_accession_id(value): + # Case: lookup by accession (only by root). + return (Portal.LOOKUP_ROOT, ref_validator) + elif schema_property_info_submitted_id := schema_properties.get("submitted_id"): + if schema_property_pattern_submitted_id := schema_property_info_submitted_id.get("pattern"): + if re.match(schema_property_pattern_submitted_id, value): + # Case: lookup by submitted_id (only by specified type). + return (Portal.LOOKUP_SPECIFIED_TYPE, ref_validator) + return DEFAULT_RESULT + + @staticmethod + def _is_accession_id(value: str) -> bool: + # This is here for now because of problems with circular dependencies. + # See: smaht-portal/.../schema_formats.py/is_accession(instance) ... + return isinstance(value, str) and re.match(r"^SMA[1-9A-Z]{9}$", value) is not None + def url(self, url: str, raw: bool = False, database: bool = False) -> str: if not isinstance(url, str) or not url: return "/" diff --git a/dcicutils/submitr/ref_lookup_strategy.py b/dcicutils/submitr/ref_lookup_strategy.py index ae9ae3127..b0dc69261 100644 --- a/dcicutils/submitr/ref_lookup_strategy.py +++ b/dcicutils/submitr/ref_lookup_strategy.py @@ -3,23 +3,26 @@ from dcicutils.structured_data import Portal # This function is exposed (to smaht-portal/ingester and smaht-submitr) only because previously, -# before it was fully developed, we had differing behaviors; but this has been unified; this +# before it was fully developed, we had differing behaviors; but this has been unified; so this # could now be internalized to structured_data, and portal_object_utils (TODO). def ref_lookup_strategy(portal: Portal, type_name: str, schema: dict, value: str) -> (int, Optional[str]): # - # Note this slight odd situation WRT object lookups by submitted_id and accession ... - # - # /{submitted_id} # NOT FOUND - # /UnalignedReads/{submitted_id} # OK - # /SubmittedFile/{submitted_id} # OK - # /File/{submitted_id} # NOT FOUND - # - # /{accession} # OK - # /UnalignedReads/{accession} # NOT FOUND - # /SubmittedFile/{accession} # NOT FOUND - # /File/{accession} # OK + # Note this slight odd situation WRT object lookups by submitted_id and accession: + # -----------------------------+-----------------------------------------------+---------------+ + # PATH | EXAMPLE | LOOKUP RESULT | + # -----------------------------+-----------------------------------------------+---------------+ + # /submitted_id | //UW_FILE-SET_COLO-829BL_HI-C_1 | NOT FOUND | + # /UnalignedReads/submitted_id | /UnalignedReads/UW_FILE-SET_COLO-829BL_HI-C_1 | FOUND | + # /SubmittedFile/submitted_id | /SubmittedFile/UW_FILE-SET_COLO-829BL_HI-C_1 | FOUND | + # /File/submitted_id | /File/UW_FILE-SET_COLO-829BL_HI-C_1 | NOT FOUND | + # -----------------------------+-----------------------------------------------+---------------+ + # /accession | /SMAFSFXF1RO4 | FOUND | + # /UnalignedReads/accession | /UnalignedReads/SMAFSFXF1RO4 | NOT FOUND | + # /SubmittedFile/accession | /SubmittedFile/SMAFSFXF1RO4 | NOT FOUND | + # /File/accession | /File/SMAFSFXF1RO4 | FOUND | + # -----------------------------+-----------------------------------------------+---------------+ # def ref_validator(schema: Optional[dict], property_name: Optional[str], property_value: Optional[str]) -> Optional[bool]: @@ -65,6 +68,6 @@ def ref_validator(schema: Optional[dict], # This is here for now because of problems with circular dependencies. -# See: smaht-portal/.../schema_formats.py +# See: smaht-portal/.../schema_formats.py/is_accession(instance) ... def _is_accession_id(value: str) -> bool: return isinstance(value, str) and re.match(r"^SMA[1-9A-Z]{9}$", value) is not None From 776759445ac092a981a9825653b8c366d2de1e8a Mon Sep 17 00:00:00 2001 From: David Michaels Date: Mon, 27 May 2024 07:27:07 -0400 Subject: [PATCH 12/37] unified/migrated some portal_object_utils stuff to portal_utils --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 8f2a24b7f..0282e9e55 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "dcicutils" -version = "8.8.6.1b4" +version = "8.8.6.1b5" description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources" authors = ["4DN-DCIC Team "] license = "MIT" From 92dd0244001d0e68d52786cd4416743ba09467fa Mon Sep 17 00:00:00 2001 From: David Michaels Date: Mon, 27 May 2024 07:31:35 -0400 Subject: [PATCH 13/37] lint --- dcicutils/portal_object_utils.py | 1 - 1 file changed, 1 deletion(-) diff --git a/dcicutils/portal_object_utils.py b/dcicutils/portal_object_utils.py index 907281e0a..9bec68d08 100644 --- a/dcicutils/portal_object_utils.py +++ b/dcicutils/portal_object_utils.py @@ -1,6 +1,5 @@ from copy import deepcopy from functools import lru_cache -import re from typing import Any, Callable, List, Optional, Tuple, Type, Union from dcicutils.data_readers import RowReader from dcicutils.misc_utils import create_readonly_object From fcd4b4821a8a66d88b1c98bbed4e66c095318729 Mon Sep 17 00:00:00 2001 From: David Michaels Date: Mon, 27 May 2024 09:42:18 -0400 Subject: [PATCH 14/37] typo --- dcicutils/portal_utils.py | 4 ++-- pyproject.toml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/dcicutils/portal_utils.py b/dcicutils/portal_utils.py index af96a34e9..2928eb3e7 100644 --- a/dcicutils/portal_utils.py +++ b/dcicutils/portal_utils.py @@ -480,7 +480,7 @@ def is_lookup_subtypes(lookup_options: int) -> bool: # noqa lookup_options = Portal.LOOKUP_UNDEFINED if schema := self.get_schema(portal_type): if callable(lookup_strategy): - lookup_options, validator = lookup_strategy(self._portal, self.type, schema, identifying_value) + lookup_options, validator = lookup_strategy(self, self.type, schema, identifying_value) if callable(validator): if validator(schema, identifying_property, identifying_value) is False: continue @@ -499,7 +499,7 @@ def is_lookup_subtypes(lookup_options: int) -> bool: # noqa if is_lookup_root(lookup_options) and not is_lookup_root_first(lookup_options): results.append(f"/{identifying_value}") if is_lookup_subtypes(lookup_options): - for subtype_name in self._portal.get_schema_subtype_names(self.type): + for subtype_name in self.get_schema_subtype_names(self.type): results.append(f"/{subtype_name}/{identifying_value}") return results diff --git a/pyproject.toml b/pyproject.toml index 0282e9e55..6e4f7f517 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "dcicutils" -version = "8.8.6.1b5" +version = "8.8.6.1b6" description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources" authors = ["4DN-DCIC Team "] license = "MIT" From a5514949f31399b0f5cc08fed9d72b4b89102e03 Mon Sep 17 00:00:00 2001 From: David Michaels Date: Mon, 27 May 2024 09:46:55 -0400 Subject: [PATCH 15/37] typo-again --- dcicutils/portal_utils.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/dcicutils/portal_utils.py b/dcicutils/portal_utils.py index 2928eb3e7..1c2bfd1a5 100644 --- a/dcicutils/portal_utils.py +++ b/dcicutils/portal_utils.py @@ -480,7 +480,7 @@ def is_lookup_subtypes(lookup_options: int) -> bool: # noqa lookup_options = Portal.LOOKUP_UNDEFINED if schema := self.get_schema(portal_type): if callable(lookup_strategy): - lookup_options, validator = lookup_strategy(self, self.type, schema, identifying_value) + lookup_options, validator = lookup_strategy(self, portal_type, schema, identifying_value) if callable(validator): if validator(schema, identifying_property, identifying_value) is False: continue @@ -494,12 +494,12 @@ def is_lookup_subtypes(lookup_options: int) -> bool: # noqa lookup_options = Portal.LOOKUP_DEFAULT if is_lookup_root_first(lookup_options): results.append(f"/{identifying_value}") - if is_lookup_specified_type(lookup_options) and self.type: - results.append(f"/{self.type}/{identifying_value}") + if is_lookup_specified_type(lookup_options) and portal_type: + results.append(f"/{portal_type}/{identifying_value}") if is_lookup_root(lookup_options) and not is_lookup_root_first(lookup_options): results.append(f"/{identifying_value}") if is_lookup_subtypes(lookup_options): - for subtype_name in self.get_schema_subtype_names(self.type): + for subtype_name in self.get_schema_subtype_names(portal_type): results.append(f"/{subtype_name}/{identifying_value}") return results From 141a48a92d83cf5b8eb9a83ef6d66f23376c3ca5 Mon Sep 17 00:00:00 2001 From: David Michaels Date: Mon, 27 May 2024 09:47:13 -0400 Subject: [PATCH 16/37] version --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 6e4f7f517..1f1dcbd3a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "dcicutils" -version = "8.8.6.1b6" +version = "8.8.6.1b7" description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources" authors = ["4DN-DCIC Team "] license = "MIT" From ab88d6a299d60a0f836ca349690e080c05ca1885 Mon Sep 17 00:00:00 2001 From: David Michaels Date: Mon, 27 May 2024 10:10:35 -0400 Subject: [PATCH 17/37] removed all arg from get_identifying_paths in portal_utils --- dcicutils/portal_object_utils.py | 7 ++----- dcicutils/portal_utils.py | 8 ++------ pyproject.toml | 2 +- test/test_portal_object_utils.py | 10 ++++------ 4 files changed, 9 insertions(+), 18 deletions(-) diff --git a/dcicutils/portal_object_utils.py b/dcicutils/portal_object_utils.py index 9bec68d08..9a64cb2bd 100644 --- a/dcicutils/portal_object_utils.py +++ b/dcicutils/portal_object_utils.py @@ -145,15 +145,12 @@ def diff_deleting(value: Any) -> object: # noqa return diffs @lru_cache(maxsize=1) - def _get_identifying_paths(self, all: bool = True, - ref_lookup_strategy: Optional[Callable] = None) -> Optional[List[str]]: + def _get_identifying_paths(self, ref_lookup_strategy: Optional[Callable] = None) -> Optional[List[str]]: if not self._portal and (uuid := self.uuid): - if all is True and (type := self.type): - return [f"/{type}/{uuid}", f"/{uuid}"] return [f"/{uuid}"] # Migrating to and unifying this in portal_utils.Portal.get_identifying_paths (2024-05-26). return self._portal.get_identifying_paths(self._data, - portal_type=self.schema, all=all, + portal_type=self.schema, lookup_strategy=ref_lookup_strategy) if self._portal else None def _normalized_refs(self, refs: List[dict]) -> Tuple[PortalObject, int]: diff --git a/dcicutils/portal_utils.py b/dcicutils/portal_utils.py index 1c2bfd1a5..42f3a4927 100644 --- a/dcicutils/portal_utils.py +++ b/dcicutils/portal_utils.py @@ -407,7 +407,7 @@ def get_schema_subtype_names(self, type_name: str) -> List[str]: @function_cache(maxsize=100, serialize_key=True) def get_identifying_paths(self, portal_object: dict, portal_type: Optional[Union[str, dict]] = None, - all: bool = True, lookup_strategy: Optional[Union[Callable, bool]] = None) -> List[str]: + lookup_strategy: Optional[Union[Callable, bool]] = None) -> List[str]: """ Returns the list of the identifying Portal (URL) paths for the given Portal object. Favors any uuid and identifier based paths and defavors aliases based paths (ala self.get_identifying_property_names); @@ -467,15 +467,11 @@ def is_lookup_subtypes(lookup_options: int) -> bool: # noqa # And note the disction of just using /{uuid} here rather than /{type}/{uuid} as in the else # statement below is not really necessary; just here for emphasis that this is all that's needed. # - if all is True: - results.append(f"/{portal_type}/{identifying_value}") results.append(f"/{identifying_value}") elif isinstance(identifying_value, list): for identifying_value_item in identifying_value: if identifying_value_item: results.append(f"/{portal_type}/{identifying_value_item}") - if all is True: - results.append(f"/{identifying_value_item}") else: lookup_options = Portal.LOOKUP_UNDEFINED if schema := self.get_schema(portal_type): @@ -542,7 +538,7 @@ def get_identifying_property_names(self, schema: Union[str, dict], @staticmethod def _lookup_strategy(portal: Portal, type_name: str, schema: dict, value: str) -> (int, Optional[str]): # - # Note this slight odd situation WRT object lookups by submitted_id and accession: + # Note this slightly odd situation WRT object lookups by submitted_id and accession: # -----------------------------+-----------------------------------------------+---------------+ # PATH | EXAMPLE | LOOKUP RESULT | # -----------------------------+-----------------------------------------------+---------------+ diff --git a/pyproject.toml b/pyproject.toml index 1f1dcbd3a..5ed46ce3e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "dcicutils" -version = "8.8.6.1b7" +version = "8.8.6.1b8" description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources" authors = ["4DN-DCIC Team "] license = "MIT" diff --git a/test/test_portal_object_utils.py b/test/test_portal_object_utils.py index 18e632620..9ed1868f3 100644 --- a/test/test_portal_object_utils.py +++ b/test/test_portal_object_utils.py @@ -600,8 +600,7 @@ def test_compare(): assert portal_object.types == ["IngestionSubmission", "Item"] assert not portal_object.schema assert not portal_object.identifying_properties - assert portal_object._get_identifying_paths() == [f"/{TEST_OBJECT_DATABASE_JSON['@type'][0]}/{TEST_OBJECT_UUID}", - f"/{TEST_OBJECT_UUID}"] + assert portal_object._get_identifying_paths() == [f"/{TEST_OBJECT_UUID}"] assert portal_object.compare(TEST_OBJECT_DATABASE_JSON) == ({}, 0) portal_object_copy = portal_object.copy() @@ -628,10 +627,9 @@ def test_compare(): assert portal_object_found.schema == TEST_OBJECT_SCHEMA_JSON assert portal_object_found.identifying_properties == ["uuid", "aliases"] assert portal_object_found._get_identifying_paths() == ( - [f"/{TEST_OBJECT_DATABASE_JSON['@type'][0]}/{TEST_OBJECT_UUID}", - f"/{TEST_OBJECT_UUID}", - "/IngestionSubmission/foo", "/foo", - "/IngestionSubmission/bar", "/bar"]) + [f"/{TEST_OBJECT_UUID}", + "/IngestionSubmission/foo", + "/IngestionSubmission/bar"]) portal_object_copy = portal_object.copy() portal_object_copy.data["xyzzy"] = 123 From 5645ca86b1147d8950491d3730e6091ecd141a56 Mon Sep 17 00:00:00 2001 From: David Michaels Date: Tue, 28 May 2024 06:59:23 -0400 Subject: [PATCH 18/37] update to portal_utils/get_identifying_path --- dcicutils/portal_utils.py | 7 +++++++ pyproject.toml | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/dcicutils/portal_utils.py b/dcicutils/portal_utils.py index 42f3a4927..3a854bcc1 100644 --- a/dcicutils/portal_utils.py +++ b/dcicutils/portal_utils.py @@ -499,6 +499,13 @@ def is_lookup_subtypes(lookup_options: int) -> bool: # noqa results.append(f"/{subtype_name}/{identifying_value}") return results + @function_cache(maxsize=100, serialize_key=True) + def get_identifying_path(self, portal_object: dict, portal_type: Optional[Union[str, dict]] = None, + lookup_strategy: Optional[Union[Callable, bool]] = None) -> Optional[str]: + if identifying_paths := self.get_identifying_path(portal_object, portal_type, lookup_strategy): + return identifying_paths[0] + return None + @function_cache(maxsize=100, serialize_key=True) def get_identifying_property_names(self, schema: Union[str, dict], portal_object: Optional[dict] = None) -> List[str]: diff --git a/pyproject.toml b/pyproject.toml index 5ed46ce3e..85fc29b28 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "dcicutils" -version = "8.8.6.1b8" +version = "8.8.6.1b9" description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources" authors = ["4DN-DCIC Team "] license = "MIT" From d9bab8944d57f0bbb3d55f29224542ab9e86cbfe Mon Sep 17 00:00:00 2001 From: David Michaels Date: Tue, 28 May 2024 07:04:35 -0400 Subject: [PATCH 19/37] update to portal_utils/get_identifying_path --- dcicutils/portal_utils.py | 2 +- pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/dcicutils/portal_utils.py b/dcicutils/portal_utils.py index 3a854bcc1..f93b0250b 100644 --- a/dcicutils/portal_utils.py +++ b/dcicutils/portal_utils.py @@ -502,7 +502,7 @@ def is_lookup_subtypes(lookup_options: int) -> bool: # noqa @function_cache(maxsize=100, serialize_key=True) def get_identifying_path(self, portal_object: dict, portal_type: Optional[Union[str, dict]] = None, lookup_strategy: Optional[Union[Callable, bool]] = None) -> Optional[str]: - if identifying_paths := self.get_identifying_path(portal_object, portal_type, lookup_strategy): + if identifying_paths := self.get_identifying_paths(portal_object, portal_type, lookup_strategy): return identifying_paths[0] return None diff --git a/pyproject.toml b/pyproject.toml index 85fc29b28..e0391f98a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "dcicutils" -version = "8.8.6.1b9" +version = "8.8.6.1b10" description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources" authors = ["4DN-DCIC Team "] license = "MIT" From 174a83f3b59aa2bda84b985372a71973bce85204 Mon Sep 17 00:00:00 2001 From: David Michaels Date: Tue, 28 May 2024 07:16:32 -0400 Subject: [PATCH 20/37] update to portal_utils/get_identifying_path --- dcicutils/portal_utils.py | 11 +++++++++-- pyproject.toml | 2 +- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/dcicutils/portal_utils.py b/dcicutils/portal_utils.py index f93b0250b..005eb400a 100644 --- a/dcicutils/portal_utils.py +++ b/dcicutils/portal_utils.py @@ -407,6 +407,7 @@ def get_schema_subtype_names(self, type_name: str) -> List[str]: @function_cache(maxsize=100, serialize_key=True) def get_identifying_paths(self, portal_object: dict, portal_type: Optional[Union[str, dict]] = None, + first_only: bool = False, lookup_strategy: Optional[Union[Callable, bool]] = None) -> List[str]: """ Returns the list of the identifying Portal (URL) paths for the given Portal object. Favors any uuid @@ -467,7 +468,10 @@ def is_lookup_subtypes(lookup_options: int) -> bool: # noqa # And note the disction of just using /{uuid} here rather than /{type}/{uuid} as in the else # statement below is not really necessary; just here for emphasis that this is all that's needed. # - results.append(f"/{identifying_value}") + if first_only is True: + results.append(f"/{portal_type}/{identifying_value}") + else: + results.append(f"/{identifying_value}") elif isinstance(identifying_value, list): for identifying_value_item in identifying_value: if identifying_value_item: @@ -497,12 +501,15 @@ def is_lookup_subtypes(lookup_options: int) -> bool: # noqa if is_lookup_subtypes(lookup_options): for subtype_name in self.get_schema_subtype_names(portal_type): results.append(f"/{subtype_name}/{identifying_value}") + if (first_only is True) and results: + return results return results @function_cache(maxsize=100, serialize_key=True) def get_identifying_path(self, portal_object: dict, portal_type: Optional[Union[str, dict]] = None, lookup_strategy: Optional[Union[Callable, bool]] = None) -> Optional[str]: - if identifying_paths := self.get_identifying_paths(portal_object, portal_type, lookup_strategy): + if identifying_paths := self.get_identifying_paths(portal_object, portal_type, first_only=True, + lookup_strategy=lookup_strategy): return identifying_paths[0] return None diff --git a/pyproject.toml b/pyproject.toml index e0391f98a..c3c6bb584 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "dcicutils" -version = "8.8.6.1b10" +version = "8.8.6.1b11" description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources" authors = ["4DN-DCIC Team "] license = "MIT" From a5e84df826591a5c59077d82c7cecc366daa035f Mon Sep 17 00:00:00 2001 From: David Michaels Date: Wed, 29 May 2024 08:40:10 -0400 Subject: [PATCH 21/37] Added Question class to command_utils (from smaht-submitr) --- CHANGELOG.rst | 1 + dcicutils/command_utils.py | 70 +++++++++++++++++++++++++++++++++++++- pyproject.toml | 2 +- 3 files changed, 71 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index c4d06872f..3fae95c04 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -11,6 +11,7 @@ Change Log ===== * Added merge capabilities to structured_data. (IN PROGRESS: 2025-05-25) +* Added Question class to command_utils (factored out of smaht-submitr). 8.8.6 diff --git a/dcicutils/command_utils.py b/dcicutils/command_utils.py index ba09d57a1..8b230520a 100644 --- a/dcicutils/command_utils.py +++ b/dcicutils/command_utils.py @@ -1,3 +1,4 @@ +from __future__ import annotations import contextlib import functools import glob @@ -7,7 +8,7 @@ import requests import subprocess -from typing import Optional +from typing import Callable, Optional from .exceptions import InvalidParameterError from .lang_utils import there_are from .misc_utils import INPUT, PRINT, environ_bool, print_error_message, decorator @@ -384,3 +385,70 @@ def fail(*message): message = str(e) # Note: We ignore the type, which isn't intended to be shown. PRINT(message) exit(1) + + +class Question: + """ + Supports asking the user (via stdin) a yes/no question, possibly repeatedly; and after + some maximum number times of the same answer in a row (consecutively), then asks them + if they want to automatically give that same answer to any/all subsequent questions. + Supports static/global list of such Question instances, hashed (only) by the question text. + """ + _static_instances = {} + + @staticmethod + def instance(question: Optional[str] = None, + max: Optional[int] = None, printf: Optional[Callable] = None) -> Question: + question = question if isinstance(question, str) else "" + if not (instance := Question._static_instances.get(question)): + Question._static_instances[question] = (instance := Question(question, max=max, printf=printf)) + return instance + + @staticmethod + def yes(question: Optional[str] = None, + max: Optional[int] = None, printf: Optional[Callable] = None) -> bool: + return Question.instance(question, max=max, printf=printf).ask() + + def __init__(self, question: Optional[str] = None, + max: Optional[int] = None, printf: Optional[Callable] = None) -> None: + self._question = question if isinstance(question, str) else "" + self._max = max if isinstance(max, int) and max > 0 else None + self._print = printf if callable(printf) else print + self._yes_consecutive_count = 0 + self._no_consecutive_count = 0 + self._yes_automatic = False + self._no_automatic = False + + def ask(self, question: Optional[str] = None) -> bool: + + def question_automatic(value: str) -> bool: + nonlocal self + RARROW = "▶" + LARROW = "◀" + if yes_or_no(f"{RARROW}{RARROW}{RARROW}" + f" Do you want to answer {value} to all such questions?" + f" {LARROW}{LARROW}{LARROW}"): + return True + self._yes_consecutive_count = 0 + self._no_consecutive_count = 0 + + if self._yes_automatic: + return True + elif self._no_automatic: + return False + elif yes_or_no((question if isinstance(question, str) else "") or self._question or "Undefined question"): + self._yes_consecutive_count += 1 + self._no_consecutive_count = 0 + if (self._no_consecutive_count == 0) and self._max and (self._yes_consecutive_count >= self._max): + # Have reached the maximum number of consecutive YES answers; ask if YES to all subsequent. + if question_automatic("YES"): + self._yes_automatic = True + return True + else: + self._no_consecutive_count += 1 + self._yes_consecutive_count = 0 + if (self._yes_consecutive_count == 0) and self._max and (self._no_consecutive_count >= self._max): + # Have reached the maximum number of consecutive NO answers; ask if NO to all subsequent. + if question_automatic("NO"): + self._no_automatic = True + return False diff --git a/pyproject.toml b/pyproject.toml index c3c6bb584..c670f51f0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "dcicutils" -version = "8.8.6.1b11" +version = "8.8.6.1b12" description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources" authors = ["4DN-DCIC Team "] license = "MIT" From b59a5c1e5df4ce8b81cf53b22fa50152ced8c881 Mon Sep 17 00:00:00 2001 From: David Michaels Date: Wed, 29 May 2024 12:08:34 -0400 Subject: [PATCH 22/37] fix in structured_data for json file --- dcicutils/structured_data.py | 14 ++++++++------ pyproject.toml | 2 +- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/dcicutils/structured_data.py b/dcicutils/structured_data.py index 7e0034eae..dc12a80de 100644 --- a/dcicutils/structured_data.py +++ b/dcicutils/structured_data.py @@ -351,20 +351,22 @@ def get_counts() -> Tuple[int, int]: def _load_json_file(self, file: str) -> None: with open(file) as f: - item = json.load(f) + data = json.load(f) + import pdb ; pdb.set_trace() # noqa + pass if ((schema_name_inferred_from_file_name := Schema.type_name(file)) and (self._portal.get_schema(schema_name_inferred_from_file_name) is not None)): # noqa # If the JSON file name looks like a schema name then assume it # contains an object or an array of object of that schema type. if self._merge: - item = self._merge_with_existing_portal_object(item, schema_name_inferred_from_file_name) - self._add(Schema.type_name(file), item) - elif isinstance(item, dict): + data = self._merge_with_existing_portal_object(data, schema_name_inferred_from_file_name) + self._add(Schema.type_name(file), data) + elif isinstance(data, dict): # Otherwise if the JSON file name does not look like a schema name then # assume it a dictionary where each property is the name of a schema, and # which (each property) contains a list of object of that schema type. - for schema_name in item: - item = item[schema_name] + for schema_name in data: + item = data[schema_name] if self._merge: item = self._merge_with_existing_portal_object(item, schema_name) self._add(schema_name, item) diff --git a/pyproject.toml b/pyproject.toml index c670f51f0..deafb6dc9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "dcicutils" -version = "8.8.6.1b12" +version = "8.8.6.1b13" description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources" authors = ["4DN-DCIC Team "] license = "MIT" From b23c3a3cdc2f36e55aac513f33675350da01ac16 Mon Sep 17 00:00:00 2001 From: David Michaels Date: Wed, 29 May 2024 12:09:42 -0400 Subject: [PATCH 23/37] fix in structured_data for json file --- dcicutils/structured_data.py | 9 --------- 1 file changed, 9 deletions(-) diff --git a/dcicutils/structured_data.py b/dcicutils/structured_data.py index dc12a80de..20c50bdf4 100644 --- a/dcicutils/structured_data.py +++ b/dcicutils/structured_data.py @@ -11,7 +11,6 @@ from dcicutils.common import OrchestratedApp from dcicutils.data_readers import CsvReader, Excel, RowReader from dcicutils.datetime_utils import normalize_date_string, normalize_datetime_string -from dcicutils.file_utils import search_for_file from dcicutils.misc_utils import (create_dict, create_readonly_object, is_uuid, load_json_if, merge_objects, remove_empty_properties, right_trim, split_string, to_boolean, to_enum, to_float, to_integer, VirtualApp) @@ -209,14 +208,6 @@ def upload_files(self) -> List[str]: result.append({"type": type_name, "file": file_name}) return result - def upload_files_located(self, - location: Union[str, Optional[List[str]]] = None, recursive: bool = False) -> List[str]: - upload_files = copy.deepcopy(self.upload_files) - for upload_file in upload_files: - if file_path := search_for_file(upload_file["file"], location, recursive=recursive, single=True): - upload_file["path"] = file_path - return upload_files - @property def nrows(self) -> int: return self._nrows From ee549f97c09f3b35167c3cec322267db913e18be Mon Sep 17 00:00:00 2001 From: David Michaels Date: Wed, 29 May 2024 12:12:54 -0400 Subject: [PATCH 24/37] pdb --- dcicutils/structured_data.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/dcicutils/structured_data.py b/dcicutils/structured_data.py index 20c50bdf4..7f6a88d5f 100644 --- a/dcicutils/structured_data.py +++ b/dcicutils/structured_data.py @@ -343,8 +343,6 @@ def get_counts() -> Tuple[int, int]: def _load_json_file(self, file: str) -> None: with open(file) as f: data = json.load(f) - import pdb ; pdb.set_trace() # noqa - pass if ((schema_name_inferred_from_file_name := Schema.type_name(file)) and (self._portal.get_schema(schema_name_inferred_from_file_name) is not None)): # noqa # If the JSON file name looks like a schema name then assume it From 11caac8cb8a676d33376a1d7b5c732a658d3c21c Mon Sep 17 00:00:00 2001 From: David Michaels Date: Wed, 29 May 2024 12:13:01 -0400 Subject: [PATCH 25/37] version --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index deafb6dc9..304f47444 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "dcicutils" -version = "8.8.6.1b13" +version = "8.8.6.1b14" description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources" authors = ["4DN-DCIC Team "] license = "MIT" From e6127f8440304428681556bdd98988b5199b4720 Mon Sep 17 00:00:00 2001 From: David Michaels Date: Thu, 30 May 2024 07:55:21 -0400 Subject: [PATCH 26/37] added portal_utils.Portal.delete/purge_metadata, for update-portal-object utility script --- dcicutils/portal_utils.py | 16 +++++++++++++++- pyproject.toml | 2 +- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/dcicutils/portal_utils.py b/dcicutils/portal_utils.py index 005eb400a..daf5264b4 100644 --- a/dcicutils/portal_utils.py +++ b/dcicutils/portal_utils.py @@ -17,7 +17,7 @@ from webtest.app import TestApp, TestResponse from wsgiref.simple_server import make_server as wsgi_make_server from dcicutils.common import APP_SMAHT, OrchestratedApp, ORCHESTRATED_APPS -from dcicutils.ff_utils import get_metadata, get_schema, patch_metadata, post_metadata +from dcicutils.ff_utils import delete_metadata, get_metadata, get_schema, patch_metadata, post_metadata, purge_metadata from dcicutils.misc_utils import to_camel_case, VirtualApp from dcicutils.schema_utils import get_identifying_properties from dcicutils.tmpfile_utils import temporary_file @@ -280,6 +280,20 @@ def post_metadata(self, object_type: str, data: dict, check_only: bool = False) add_on="check_only=True" if check_only else "") return self.post(f"/{object_type}{'?check_only=True' if check_only else ''}", data).json() + def delete_metadata(self, object_id: str) -> Optional[dict]: + if isinstance(object_id, str) and object_id: + if self.key: + return delete_metadata(obj_id=object_id, key=self.key) + else: + return self.patch_metadata(object_id, {"status": "deleted"}) + return None + + def purge_metadata(self, object_id: str) -> Optional[dict]: + if isinstance(object_id, str) and object_id: + if self.key: + return purge_metadata(obj_id=object_id, key=self.key) + return None + def get_health(self) -> OptionalResponse: return self.get("/health") diff --git a/pyproject.toml b/pyproject.toml index 1a63902a0..1c0bd9c84 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "dcicutils" -version = "8.9.0.1b1" # TODO: To become 8.10.0 +version = "8.9.0.1b2" # TODO: To become 8.10.0 description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources" authors = ["4DN-DCIC Team "] license = "MIT" From 6b77767bf0d1e5ce561ec8747e5b56c9a2e15a09 Mon Sep 17 00:00:00 2001 From: David Michaels Date: Thu, 30 May 2024 09:09:04 -0400 Subject: [PATCH 27/37] CHANGES-updates --- CHANGELOG.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 9868b3e89..28c0ea4a1 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -11,6 +11,9 @@ Change Log * Added merge capabilities to structured_data. (IN PROGRESS: 2025-05-25) * Added Question class to command_utils (factored out of smaht-submitr). +* Refactored out some identifying property related code from portal_object_utils to portal_utils. +* Internalized lookup_strategy related code to structured_data/portal_object_utils/portal_utils. +* Added delete/purge_metadata to portal_utils.Portol, for testing and completeness. 8.9.0 From d27b68e1412c02ed7145c4e268384c597032ed09 Mon Sep 17 00:00:00 2001 From: David Michaels Date: Thu, 30 May 2024 15:51:06 -0400 Subject: [PATCH 28/37] merge functionality for submitr --- dcicutils/misc_utils.py | 51 +++++++++++++++++++++++++++++------- dcicutils/structured_data.py | 11 ++++---- pyproject.toml | 2 +- 3 files changed, 47 insertions(+), 17 deletions(-) diff --git a/dcicutils/misc_utils.py b/dcicutils/misc_utils.py index 0d719c421..e830e55e5 100644 --- a/dcicutils/misc_utils.py +++ b/dcicutils/misc_utils.py @@ -4,6 +4,7 @@ from collections import namedtuple import appdirs +from copy import deepcopy import contextlib import datetime import functools @@ -2199,28 +2200,58 @@ def merge_key_value_dict_lists(x, y): return [key_value_dict(k, v) for k, v in merged.items()] -def merge_objects(target: Union[dict, List[Any]], source: Union[dict, List[Any]], full: bool = False) -> dict: +def merge_objects(target: Union[dict, List[Any]], source: Union[dict, List[Any]], + full: bool = False, # deprecated + expand_lists: Optional[bool] = None, + primitive_lists: bool = False, + copy: bool = False, _recursing: bool = False) -> Union[dict, List[Any]]: """ - Merges the given source dictionary or list into the target dictionary or list. - This MAY well change the given target (dictionary or list) IN PLACE. - The the full argument is True then any target lists longer than the - source be will be filled out with the last element(s) of the source. + Merges the given source dictionary or list into the target dictionary or list and returns the + result. This MAY well change the given target (dictionary or list) IN PLACE ... UNLESS the copy + argument is True, then the given target will not change as a local copy is made (and returned). + + If the expand_lists argument is True then any target lists longer than the + source be will be filled out with the last element(s) of the source; the full + argument (is deprecated and) is a synomym for this. The default is False. + + If the primitive_lists argument is True then lists of primitives (i.e. lists in which + NONE of its elements are dictionaries, lists, or tuples) will themselves be treated + like primitives, meaning the whole of a source list will replace the corresponding + target; otherwise they will be merged normally, meaning each element of a source list + will be merged, recursively, into the corresponding target list. The default is False. """ + def is_primitive_list(value: Any) -> bool: # noqa + if not isinstance(value, list): + return False + for item in value: + if isinstance(item, (dict, list, tuple)): + return False + return True + if target is None: return source + if expand_lists not in (True, False): + expand_lists = full is True + if (copy is True) and (_recursing is not True): + target = deepcopy(target) if isinstance(target, dict) and isinstance(source, dict) and source: for key, value in source.items(): - target[key] = merge_objects(target[key], value, full) if key in target else value + if ((primitive_lists is True) and + (key in target) and is_primitive_list(target[key]) and is_primitive_list(value)): # noqa + target[key] = value + else: + target[key] = merge_objects(target[key], value, + expand_lists=expand_lists, _recursing=True) if key in target else value elif isinstance(target, list) and isinstance(source, list) and source: for i in range(max(len(source), len(target))): if i < len(target): if i < len(source): - target[i] = merge_objects(target[i], source[i], full) - elif full: - target[i] = merge_objects(target[i], source[len(source) - 1], full) + target[i] = merge_objects(target[i], source[i], expand_lists=expand_lists, _recursing=True) + elif expand_lists is True: + target[i] = merge_objects(target[i], source[len(source) - 1], expand_lists=expand_lists) else: target.append(source[i]) - elif source: + elif source not in (None, {}, []): target = source return target diff --git a/dcicutils/structured_data.py b/dcicutils/structured_data.py index 7f6a88d5f..c9b895849 100644 --- a/dcicutils/structured_data.py +++ b/dcicutils/structured_data.py @@ -74,7 +74,7 @@ def __init__(self, file: Optional[str] = None, portal: Optional[Union[VirtualApp self._nrows = 0 self._autoadd_properties = autoadd if isinstance(autoadd, dict) and autoadd else None self._norefs = True if norefs is True else False - self._merge = True if merge is True else False + self._merge = True if merge is True else False # New merge functionality (2024-05-25) self._debug_sleep = None if debug_sleep: try: @@ -347,7 +347,7 @@ def _load_json_file(self, file: str) -> None: (self._portal.get_schema(schema_name_inferred_from_file_name) is not None)): # noqa # If the JSON file name looks like a schema name then assume it # contains an object or an array of object of that schema type. - if self._merge: + if self._merge: # New merge functionality (2024-05-25) data = self._merge_with_existing_portal_object(data, schema_name_inferred_from_file_name) self._add(Schema.type_name(file), data) elif isinstance(data, dict): @@ -356,7 +356,7 @@ def _load_json_file(self, file: str) -> None: # which (each property) contains a list of object of that schema type. for schema_name in data: item = data[schema_name] - if self._merge: + if self._merge: # New merge functionality (2024-05-25) item = self._merge_with_existing_portal_object(item, schema_name) self._add(schema_name, item) @@ -380,8 +380,7 @@ def _load_reader(self, reader: RowReader, type_name: str) -> None: structured_row_template.set_value(structured_row, column_name, value, reader.file, reader.row_number) if self._autoadd_properties: self._add_properties(structured_row, self._autoadd_properties, schema) - # New merge functionality (2024-05-25). - if self._merge: + if self._merge: # New merge functionality (2024-05-25) structured_row = self._merge_with_existing_portal_object(structured_row, schema_name) if (prune_error := self._prune_structured_row(structured_row)) is not None: self._note_error({"src": create_dict(type=schema_name, row=reader.row_number), @@ -437,7 +436,7 @@ def _merge_with_existing_portal_object(self, portal_object: dict, portal_type: s """ for identifying_path in self._portal.get_identifying_paths(portal_object, portal_type): if existing_portal_object := self._portal.get_metadata(identifying_path, raw=True, raise_exception=False): - return merge_objects(existing_portal_object, portal_object) + return merge_objects(existing_portal_object, portal_object, primitive_lists=True) return portal_object def _is_ref_lookup_specified_type(ref_lookup_flags: int) -> bool: diff --git a/pyproject.toml b/pyproject.toml index 1c0bd9c84..de7f45574 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "dcicutils" -version = "8.9.0.1b2" # TODO: To become 8.10.0 +version = "8.9.0.1b3" # TODO: To become 8.10.0 description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources" authors = ["4DN-DCIC Team "] license = "MIT" From 3591ef0a851983939512688d2c85bb015525dc52 Mon Sep 17 00:00:00 2001 From: David Michaels Date: Thu, 30 May 2024 15:59:31 -0400 Subject: [PATCH 29/37] version --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index de7f45574..d2b19b7c9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "dcicutils" -version = "8.9.0.1b3" # TODO: To become 8.10.0 +version = "8.9.0.1b4" # TODO: To become 8.10.0 description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources" authors = ["4DN-DCIC Team "] license = "MIT" From ee514c0a4100c729f911926f7ac43c76747d76af Mon Sep 17 00:00:00 2001 From: David Michaels Date: Thu, 30 May 2024 16:09:45 -0400 Subject: [PATCH 30/37] version --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index d2b19b7c9..cffdd0bfa 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "dcicutils" -version = "8.9.0.1b4" # TODO: To become 8.10.0 +version = "8.9.0.1b5" # TODO: To become 8.10.0 description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources" authors = ["4DN-DCIC Team "] license = "MIT" From 565e3c92a9bbd3ad1bd2512ff546f39cc6fefcf4 Mon Sep 17 00:00:00 2001 From: David Michaels Date: Thu, 30 May 2024 18:05:04 -0400 Subject: [PATCH 31/37] version --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index cffdd0bfa..c2434a215 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "dcicutils" -version = "8.9.0.1b5" # TODO: To become 8.10.0 +version = "8.9.0.1b6" # TODO: To become 8.10.0 description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources" authors = ["4DN-DCIC Team "] license = "MIT" From cdb2842de298a35c01c210ee16288f650b2505c9 Mon Sep 17 00:00:00 2001 From: David Michaels Date: Fri, 31 May 2024 06:51:09 -0400 Subject: [PATCH 32/37] CHANGES-file-changes --- CHANGELOG.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 28c0ea4a1..2dc5296d8 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -9,11 +9,11 @@ Change Log 8.10.0 ====== -* Added merge capabilities to structured_data. (IN PROGRESS: 2025-05-25) +* Added merge capabilities to structured_data. * Added Question class to command_utils (factored out of smaht-submitr). * Refactored out some identifying property related code from portal_object_utils to portal_utils. * Internalized lookup_strategy related code to structured_data/portal_object_utils/portal_utils. -* Added delete/purge_metadata to portal_utils.Portol, for testing and completeness. +* Added delete/purge_metadata to portal_utils.Portal, for testing and completeness. 8.9.0 From 06bd6cd52364587128138de9042c06cfce63dea1 Mon Sep 17 00:00:00 2001 From: David Michaels Date: Fri, 31 May 2024 07:30:11 -0400 Subject: [PATCH 33/37] tests-for-merge-objects --- test/test_misc_utils.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/test/test_misc_utils.py b/test/test_misc_utils.py index 574f758f5..0f2af3c70 100644 --- a/test/test_misc_utils.py +++ b/test/test_misc_utils.py @@ -3684,6 +3684,22 @@ def test_merge_objects_8(): assert target == expected +def test_merge_objects_9(): + target = {"abc": [1, 2, 3]} + source = {"abc": [4, 5]} + expected = {"abc": [4, 5]} + result = merge_objects(target, source, primitive_lists=True, copy=True) + assert result == expected + assert id(target) != id(result) + + target = {"abc": [1, 2, 3]} + source = {"abc": [4, 5]} + expected = {"abc": [4, 5, 3]} + result = merge_objects(target, source, primitive_lists=False, copy=False) + assert result == expected + assert id(target) == id(result) + + def test_to_integer(): assert to_integer("17") == 17 assert to_integer("17.0") == 17 From 51d2206ef5c69dc026e3634c7b5776ac9f6a73f6 Mon Sep 17 00:00:00 2001 From: David Michaels Date: Fri, 31 May 2024 07:30:19 -0400 Subject: [PATCH 34/37] version --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index c2434a215..18547b5c2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "dcicutils" -version = "8.9.0.1b6" # TODO: To become 8.10.0 +version = "8.9.0.1b7" # TODO: To become 8.10.0 description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources" authors = ["4DN-DCIC Team "] license = "MIT" From 9ec0259a5d05f63721e68512d969563a17fb153f Mon Sep 17 00:00:00 2001 From: David Michaels Date: Tue, 11 Jun 2024 07:55:14 -0400 Subject: [PATCH 35/37] removed delete/purge functions from portal_utils. --- CHANGELOG.rst | 1 - dcicutils/portal_utils.py | 16 +--------------- pyproject.toml | 2 +- 3 files changed, 2 insertions(+), 17 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 2dc5296d8..3a43116e3 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -13,7 +13,6 @@ Change Log * Added Question class to command_utils (factored out of smaht-submitr). * Refactored out some identifying property related code from portal_object_utils to portal_utils. * Internalized lookup_strategy related code to structured_data/portal_object_utils/portal_utils. -* Added delete/purge_metadata to portal_utils.Portal, for testing and completeness. 8.9.0 diff --git a/dcicutils/portal_utils.py b/dcicutils/portal_utils.py index daf5264b4..005eb400a 100644 --- a/dcicutils/portal_utils.py +++ b/dcicutils/portal_utils.py @@ -17,7 +17,7 @@ from webtest.app import TestApp, TestResponse from wsgiref.simple_server import make_server as wsgi_make_server from dcicutils.common import APP_SMAHT, OrchestratedApp, ORCHESTRATED_APPS -from dcicutils.ff_utils import delete_metadata, get_metadata, get_schema, patch_metadata, post_metadata, purge_metadata +from dcicutils.ff_utils import get_metadata, get_schema, patch_metadata, post_metadata from dcicutils.misc_utils import to_camel_case, VirtualApp from dcicutils.schema_utils import get_identifying_properties from dcicutils.tmpfile_utils import temporary_file @@ -280,20 +280,6 @@ def post_metadata(self, object_type: str, data: dict, check_only: bool = False) add_on="check_only=True" if check_only else "") return self.post(f"/{object_type}{'?check_only=True' if check_only else ''}", data).json() - def delete_metadata(self, object_id: str) -> Optional[dict]: - if isinstance(object_id, str) and object_id: - if self.key: - return delete_metadata(obj_id=object_id, key=self.key) - else: - return self.patch_metadata(object_id, {"status": "deleted"}) - return None - - def purge_metadata(self, object_id: str) -> Optional[dict]: - if isinstance(object_id, str) and object_id: - if self.key: - return purge_metadata(obj_id=object_id, key=self.key) - return None - def get_health(self) -> OptionalResponse: return self.get("/health") diff --git a/pyproject.toml b/pyproject.toml index 18547b5c2..0d3845b58 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "dcicutils" -version = "8.9.0.1b7" # TODO: To become 8.10.0 +version = "8.9.0.1b8" # TODO: To become 8.10.0 description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources" authors = ["4DN-DCIC Team "] license = "MIT" From bb728c172921429def14a16d65098d8ac9625771 Mon Sep 17 00:00:00 2001 From: David Michaels Date: Tue, 11 Jun 2024 10:23:18 -0400 Subject: [PATCH 36/37] comments --- dcicutils/portal_utils.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/dcicutils/portal_utils.py b/dcicutils/portal_utils.py index 005eb400a..0f0bba5e8 100644 --- a/dcicutils/portal_utils.py +++ b/dcicutils/portal_utils.py @@ -468,6 +468,9 @@ def is_lookup_subtypes(lookup_options: int) -> bool: # noqa # And note the disction of just using /{uuid} here rather than /{type}/{uuid} as in the else # statement below is not really necessary; just here for emphasis that this is all that's needed. # + # TODO + # Consider (from PR-308) writing a portal API for retrieving possible path formats. + # if first_only is True: results.append(f"/{portal_type}/{identifying_value}") else: From 2e123e93cfbd34f1818d5ae347bf8a1877b90757 Mon Sep 17 00:00:00 2001 From: David Michaels Date: Tue, 11 Jun 2024 10:24:23 -0400 Subject: [PATCH 37/37] update-version-8.10.0-ready-to-merge-pr-308 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 0d3845b58..4ea9c75b4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "dcicutils" -version = "8.9.0.1b8" # TODO: To become 8.10.0 +version = "8.10.0" description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources" authors = ["4DN-DCIC Team "] license = "MIT"