From 3862bb76ac7d8d112416b58c0ee07aa614ddfa0d Mon Sep 17 00:00:00 2001 From: David Michaels Date: Thu, 8 Feb 2024 12:05:39 -0500 Subject: [PATCH 01/21] Changes to propertly support date/time types in structured_data. --- CHANGELOG.rst | 7 +++ dcicutils/datetime_utils.py | 98 ++++++++++++++++++++++++++++++++++++ dcicutils/misc_utils.py | 8 +++ dcicutils/structured_data.py | 33 ++++++++---- pyproject.toml | 2 +- test/test_datetime_utils.py | 30 +++++++++++ 6 files changed, 166 insertions(+), 12 deletions(-) create mode 100644 dcicutils/datetime_utils.py create mode 100644 test/test_datetime_utils.py diff --git a/CHANGELOG.rst b/CHANGELOG.rst index fb886824a..fbdbca95d 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -7,6 +7,13 @@ Change Log ---------- +8.8.0 +===== +* New datetime_utils module and tests; first created for date/time support in structured_data. +* Changes to structured_data support date/time types. +* Changes to structured_data support internal references in any order. + + 8.7.2 ===== diff --git a/dcicutils/datetime_utils.py b/dcicutils/datetime_utils.py new file mode 100644 index 000000000..3a9dc9f38 --- /dev/null +++ b/dcicutils/datetime_utils.py @@ -0,0 +1,98 @@ +from dcicutils.misc_utils import normalize_spaces +from datetime import datetime, timedelta, timezone +from typing import Optional, Tuple + + +def parse_datetime_string(value: str) -> Optional[datetime]: + """ + Parses the given string into a datetime object and returns it, or if ill-formated then returns None. + The given string is assumed to be in the format "YYYY-MM-DD hh:mm:ss" and with an optional timezone + suffix in format "+hh:mm" or "+hh". Also allowed is just a date of the format "YYYY-MM-DD" in which + case a time of "00:00:00" is assumed. If no timezone is specified then the local timezone is assumed. + """ + if not isinstance(value, str) or not (value := normalize_spaces(value)): + return None + tz_hours = -1 + tz_minutes = -1 + if (t := value.rfind("T")) > 0: + value = value.replace("T", " ") + if (space := value.find(" ")) > 0 and (value_suffix := value[space + 1:]): + if (plus := value_suffix.rfind("+")) > 0 or (minus := value_suffix.rfind("-")) > 0: + value = normalize_spaces(value[:space] + " " + value_suffix[:(plus if plus > 0 else minus)]) + if value_tz := normalize_spaces(value_suffix[(plus if plus > 0 else minus) + 1:]): + if len(value_tz := value_tz.split(":")) == 2: + value_tz_hours = value_tz[0].strip() + value_tz_minutes = value_tz[1].strip() + else: + value_tz_hours = value_tz[0].strip() + value_tz_minutes = "0" + if value_tz_hours.isdigit() and value_tz_minutes.isdigit(): + tz_hours = int(value_tz_hours) + tz_minutes = int(value_tz_minutes) + if not (plus > 0): + tz_hours = -tz_hours + else: + value = value + " 00:00:00" + if tz_hours < 0 or tz_minutes < 0: + tz_hours, tz_minutes = get_local_timezone_hours_minutes() + try: + dt = datetime.strptime(value, "%Y-%m-%d %H:%M:%S") + tz = timezone(timedelta(hours=tz_hours, minutes=tz_minutes)) + return dt.replace(tzinfo=tz) + except Exception: + return None + + +def parse_date_string(value: str) -> Optional[datetime]: + """ + Parses the given string into a datetime object representing only a date and + returns it, or if ill-formated then returns None. The given string is assumed + to be in the format "YYYY-MM-DD"; if a given string of this format is suffixed + with a space or a "T" and ANYTHING else, then that trailing portion is ignored. + """ + if isinstance(value, str) and (value := normalize_spaces(value)): + if (separator := value.find(" ")) > 0 or (separator := value.find("T")) > 0: + value = value[:separator] + try: + return datetime.strptime(value, "%Y-%m-%d") + except Exception: + pass + + +def normalize_datetime_string(value: str) -> Optional[str]: + """ + Parses the given string into a datetime object and returns a string for that datetime in ISO-8601 format, + or if ill-formated then returns None. The given string is assumed to be in the format "YYYY-MM-DD hh:mm:ss" + and with an optional timezone suffix in format "+hh:mm" or "+hh". Also allowed is just a date of the + format "YYYY-MM-DD" in which case a time of "00:00:00" is assumed. If no timezone is specified then + the local timezone is assumed. The returned format looks like this: "2024-02-08T10:37:51-05:00" + """ + dt = parse_datetime_string(value) + return dt.isoformat() if dt else None + + +def normalize_date_string(value: str) -> Optional[str]: + """ + Parses the given string into a datetime object representing only a date and returns a string for that + date in ISO-8601 format, or if ill-formated then returns None. The given string is assumed to be in + the format "YYYY-MM-DD"; but if a given string of this format is suffixed with a space followed by + ANYTHING else, then that trailing portion is ignored. The returned format looks like this: "2024-02-08" + """ + d = parse_date_string(value) + return d.strftime("%Y-%m-%d") if d else None + + +def get_local_timezone_string() -> str: + """ + Returns current/local timezone in format like: "-05:00". + """ + tz_hours, tz_minutes = get_local_timezone_hours_minutes() + return f"{tz_hours:+03d}:{tz_minutes:02d}" + + +def get_local_timezone_hours_minutes() -> Tuple[int, int]: + """ + Returns a tuple with the integer hours and minutes offset for the current/local timezone. + """ + tz_minutes = datetime.now(timezone.utc).astimezone().utcoffset().total_seconds() / 60 + return int(tz_minutes // 60), int(abs(tz_minutes % 60)) diff --git a/dcicutils/misc_utils.py b/dcicutils/misc_utils.py index 6228f2f8c..219b3997f 100644 --- a/dcicutils/misc_utils.py +++ b/dcicutils/misc_utils.py @@ -2540,6 +2540,14 @@ def pad_to(target_size: int, data: list, *, padding=None): return data +def normalize_spaces(value: str) -> str: + """ + Returns the given string with multiple consecutive occurrences of whitespace + converted to a single space, and left and right trimmed of spaces. + """ + return re.sub(r"\s+", " ", value).strip() + + class JsonLinesReader: def __init__(self, fp, padded=False, padding=None): diff --git a/dcicutils/structured_data.py b/dcicutils/structured_data.py index 70c773609..8cb2efb57 100644 --- a/dcicutils/structured_data.py +++ b/dcicutils/structured_data.py @@ -10,6 +10,7 @@ from webtest.app import TestApp from dcicutils.common import OrchestratedApp from dcicutils.data_readers import CsvReader, Excel, RowReader +from dcicutils.datetime_utils import normalize_date_string, normalize_datetime_string from dcicutils.file_utils import search_for_file from dcicutils.misc_utils import (create_dict, create_readonly_object, load_json_if, merge_objects, remove_empty_properties, right_trim, @@ -368,28 +369,20 @@ class Schema(SchemaBase): def __init__(self, schema_json: dict, portal: Optional[Portal] = None) -> None: super().__init__(schema_json) -# self._data = schema_json if isinstance(schema_json, dict) else {} -# self._type = Schema.type_name(schema_json.get("title", "")) self._portal = portal # Needed only to resolve linkTo references. self._map_value_functions = { "boolean": self._map_function_boolean, "enum": self._map_function_enum, "integer": self._map_function_integer, "number": self._map_function_number, - "string": self._map_function_string + "string": self._map_function_string, + "date": self._map_function_date, + "datetime": self._map_function_datetime } self._resolved_refs = set() self._unresolved_refs = [] self._typeinfo = self._create_typeinfo(schema_json) -# @property -# def data(self) -> dict: -# return self._data - -# @property -# def type(self) -> str: -# return self._type - @staticmethod def load_by_name(name: str, portal: Portal) -> Optional[dict]: schema_json = portal.get_schema(Schema.type_name(name)) if portal else None @@ -424,6 +417,10 @@ def _map_function(self, typeinfo: dict) -> Optional[Callable]: map_function = self._map_function_enum elif isinstance(typeinfo.get("linkTo"), str): map_function = self._map_function_ref + elif (type_format := typeinfo.get("format")) == "date": + map_function = self._map_function_date + elif type_format == "date-time": + map_function = self._map_function_datetime else: map_function = self._map_value_functions.get(typeinfo_type) return map_function(typeinfo) if map_function else None @@ -454,6 +451,20 @@ def map_string(value: str, src: Optional[str]) -> str: return value if value is not None else "" return map_string + def _map_function_date(self, typeinfo: dict) -> Callable: + def map_date(value: str, src: Optional[str]) -> str: + value = normalize_date_string(value) +# if value and value.endswith(" 00:00:00"): +# value = value[:-9] + return value if value is not None else "" + return map_date + + def _map_function_datetime(self, typeinfo: dict) -> Callable: + def map_datetime(value: str, src: Optional[str]) -> str: + value = normalize_datetime_string(value) + return value if value is not None else "" + return map_datetime + def _map_function_ref(self, typeinfo: dict) -> Callable: def map_ref(value: str, link_to: str, portal: Optional[Portal], src: Optional[str]) -> Any: nonlocal self, typeinfo diff --git a/pyproject.toml b/pyproject.toml index d6c91a125..1cf79ab70 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "dcicutils" -version = "8.7.2" +version = "8.7.2.1b1" # TODO: To become 8.8.0 description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources" authors = ["4DN-DCIC Team "] license = "MIT" diff --git a/test/test_datetime_utils.py b/test/test_datetime_utils.py new file mode 100644 index 000000000..6ff6bb636 --- /dev/null +++ b/test/test_datetime_utils.py @@ -0,0 +1,30 @@ +from dcicutils.datetime_utils import get_local_timezone_string, normalize_date_string, normalize_datetime_string + + +def test_normalize_datetime_string(): + + tz = get_local_timezone_string() + + value = "2024-02-08T10:37:51-05:00" + assert normalize_datetime_string(value) == "2024-02-08T10:37:51-05:00" + + value = " 2024-01-28 17:15:32" + assert normalize_datetime_string(value) == "2024-01-28T17:15:32" + tz + + value = "2024-02-08" + assert normalize_datetime_string(value) == "2024-02-08T00:00:00" + tz + + value = " 2024-01-28 17:15:32 + 03:34" + assert normalize_datetime_string(value) == "2024-01-28T17:15:32+03:34" + + +def test_normalize_date_string(): + + value = " 2024-01-28" + assert normalize_date_string(value) == "2024-01-28" + + value = "2024-02-08T10:37:51-05:00" + assert normalize_date_string(value) == "2024-02-08" + + value = " 2024-01-28 17:15:32 + 03:34" + assert normalize_date_string(value) == "2024-01-28" From 73793889b7002e85cf03e0bb3229afeb6b305cf3 Mon Sep 17 00:00:00 2001 From: David Michaels Date: Thu, 8 Feb 2024 12:08:12 -0500 Subject: [PATCH 02/21] remove cruft --- dcicutils/structured_data.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/dcicutils/structured_data.py b/dcicutils/structured_data.py index 8cb2efb57..4b58c9d5d 100644 --- a/dcicutils/structured_data.py +++ b/dcicutils/structured_data.py @@ -454,8 +454,6 @@ def map_string(value: str, src: Optional[str]) -> str: def _map_function_date(self, typeinfo: dict) -> Callable: def map_date(value: str, src: Optional[str]) -> str: value = normalize_date_string(value) -# if value and value.endswith(" 00:00:00"): -# value = value[:-9] return value if value is not None else "" return map_date From 21cae95a87d43a915547ba8ff4e2d1347502c03d Mon Sep 17 00:00:00 2001 From: David Michaels Date: Thu, 8 Feb 2024 19:23:13 -0500 Subject: [PATCH 03/21] Fix in structured_data for resolving internal references in reverse order. --- dcicutils/structured_data.py | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/dcicutils/structured_data.py b/dcicutils/structured_data.py index 4b58c9d5d..b943bd0ae 100644 --- a/dcicutils/structured_data.py +++ b/dcicutils/structured_data.py @@ -12,7 +12,7 @@ from dcicutils.data_readers import CsvReader, Excel, RowReader from dcicutils.datetime_utils import normalize_date_string, normalize_datetime_string from dcicutils.file_utils import search_for_file -from dcicutils.misc_utils import (create_dict, create_readonly_object, load_json_if, +from dcicutils.misc_utils import (create_dict, create_readonly_object, is_uuid, load_json_if, merge_objects, remove_empty_properties, right_trim, split_string, to_boolean, to_enum, to_float, to_integer, VirtualApp) from dcicutils.portal_object_utils import PortalObject @@ -204,6 +204,17 @@ def _load_excel_file(self, file: str) -> None: order = {Schema.type_name(key): index for index, key in enumerate(self._order)} if self._order else {} for sheet_name in sorted(excel.sheet_names, key=lambda key: order.get(Schema.type_name(key), sys.maxsize)): self._load_reader(excel.sheet_reader(sheet_name), type_name=Schema.type_name(sheet_name)) + # Check for unresolved reference errors which really are not because of ordering. + # Yes such internal references will be handled correctly on actual database update via snovault.loadxl. + if ref_errors := self.ref_errors: + ref_errors_actual = [] + for ref_error in ref_errors: + if not (ref := self.portal.ref_exists(ref_error["error"])): + ref_errors_actual.append(ref_error) + if ref_errors_actual: + self._errors["ref"] = ref_errors_actual + else: + del self._errors["ref"] def _load_json_file(self, file: str) -> None: with open(file) as f: @@ -670,7 +681,13 @@ def is_file_schema(self, schema_name: str) -> bool: """ return self.is_schema_type(schema_name, FILE_SCHEMA_NAME) - def ref_exists(self, type_name: str, value: str) -> List[str]: + def ref_exists(self, type_name: str, value: Optional[str] = None) -> List[str]: + if not value: + if type_name.startswith("/") and len(parts := type_name[1:].split("/")) == 2: + type_name = parts[0] + value = parts[1] + else: + return [] resolved = [] is_resolved, resolved_uuid = self._ref_exists_single(type_name, value) if is_resolved: @@ -700,7 +717,7 @@ def _ref_exists_single(self, type_name: str, value: str) -> Tuple[bool, Optional for item in items: if (ivalue := next((item[iproperty] for iproperty in iproperties if iproperty in item), None)): if isinstance(ivalue, list) and value in ivalue or ivalue == value: - return True, None + return True, ivalue if is_uuid(ivalue) else None if (value := self.get_metadata(f"/{type_name}/{value}")) is None: return False, None return True, value.get("uuid") From bb36c2b8be123f3d155a785a7658789921bd7e55 Mon Sep 17 00:00:00 2001 From: David Michaels Date: Fri, 9 Feb 2024 07:10:15 -0500 Subject: [PATCH 04/21] comments --- CHANGELOG.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index fbdbca95d..af880022e 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -9,9 +9,9 @@ Change Log 8.8.0 ===== -* New datetime_utils module and tests; first created for date/time support in structured_data. * Changes to structured_data support date/time types. * Changes to structured_data support internal references in any order. +* New datetime_utils module and tests; first created for date/time support in structured_data. 8.7.2 From ebcd0761cd1e07b4f0ffb2ecafdb071bd8cf7610 Mon Sep 17 00:00:00 2001 From: David Michaels Date: Fri, 9 Feb 2024 07:48:51 -0500 Subject: [PATCH 05/21] flake8 --- dcicutils/datetime_utils.py | 2 +- dcicutils/structured_data.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/dcicutils/datetime_utils.py b/dcicutils/datetime_utils.py index 3a9dc9f38..816b3e8df 100644 --- a/dcicutils/datetime_utils.py +++ b/dcicutils/datetime_utils.py @@ -14,7 +14,7 @@ def parse_datetime_string(value: str) -> Optional[datetime]: return None tz_hours = -1 tz_minutes = -1 - if (t := value.rfind("T")) > 0: + if value.rfind("T") > 0: value = value.replace("T", " ") if (space := value.find(" ")) > 0 and (value_suffix := value[space + 1:]): if (plus := value_suffix.rfind("+")) > 0 or (minus := value_suffix.rfind("-")) > 0: diff --git a/dcicutils/structured_data.py b/dcicutils/structured_data.py index b943bd0ae..1d2fa95ec 100644 --- a/dcicutils/structured_data.py +++ b/dcicutils/structured_data.py @@ -209,7 +209,7 @@ def _load_excel_file(self, file: str) -> None: if ref_errors := self.ref_errors: ref_errors_actual = [] for ref_error in ref_errors: - if not (ref := self.portal.ref_exists(ref_error["error"])): + if not self.portal.ref_exists(ref_error["error"]): ref_errors_actual.append(ref_error) if ref_errors_actual: self._errors["ref"] = ref_errors_actual From 559094e907540f8e5d1198392276a10a67d551a7 Mon Sep 17 00:00:00 2001 From: David Michaels Date: Fri, 9 Feb 2024 07:51:38 -0500 Subject: [PATCH 06/21] flake8 --- docs/source/dcicutils.rst | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/docs/source/dcicutils.rst b/docs/source/dcicutils.rst index 11dec1cf5..4ef4e29f5 100644 --- a/docs/source/dcicutils.rst +++ b/docs/source/dcicutils.rst @@ -93,6 +93,13 @@ data_utils :members: +datetime_utils +^^^^^^^^^^^^^^ + +.. automodule:: dcicutils.datetime_utils + :members: + + deployment_utils ^^^^^^^^^^^^^^^^ From 4ff0cb19d1114c3c5cfd6cd601be4e51c2ebfa09 Mon Sep 17 00:00:00 2001 From: David Michaels Date: Fri, 9 Feb 2024 07:57:27 -0500 Subject: [PATCH 07/21] minor test_datetime_utils for timezone related to ga --- test/test_datetime_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_datetime_utils.py b/test/test_datetime_utils.py index 6ff6bb636..d969beb60 100644 --- a/test/test_datetime_utils.py +++ b/test/test_datetime_utils.py @@ -6,7 +6,7 @@ def test_normalize_datetime_string(): tz = get_local_timezone_string() value = "2024-02-08T10:37:51-05:00" - assert normalize_datetime_string(value) == "2024-02-08T10:37:51-05:00" + assert normalize_datetime_string(value) == "2024-02-08T10:37:51" + tz value = " 2024-01-28 17:15:32" assert normalize_datetime_string(value) == "2024-01-28T17:15:32" + tz From 7dbd00f27919c7b3a32fbe3a1f1d90702c9b88e0 Mon Sep 17 00:00:00 2001 From: David Michaels Date: Fri, 9 Feb 2024 08:17:42 -0500 Subject: [PATCH 08/21] minor fix in structured_data --- dcicutils/structured_data.py | 2 +- pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/dcicutils/structured_data.py b/dcicutils/structured_data.py index 1d2fa95ec..ae3424203 100644 --- a/dcicutils/structured_data.py +++ b/dcicutils/structured_data.py @@ -717,7 +717,7 @@ def _ref_exists_single(self, type_name: str, value: str) -> Tuple[bool, Optional for item in items: if (ivalue := next((item[iproperty] for iproperty in iproperties if iproperty in item), None)): if isinstance(ivalue, list) and value in ivalue or ivalue == value: - return True, ivalue if is_uuid(ivalue) else None + return True, (ivalue if isinstance(ivalue, str) and is_uuid(ivalue) else None) if (value := self.get_metadata(f"/{type_name}/{value}")) is None: return False, None return True, value.get("uuid") diff --git a/pyproject.toml b/pyproject.toml index 1cf79ab70..e2a6273a6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "dcicutils" -version = "8.7.2.1b1" # TODO: To become 8.8.0 +version = "8.7.2.1b2" # TODO: To become 8.8.0 description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources" authors = ["4DN-DCIC Team "] license = "MIT" From cd7b03f323e1187dabbb096b5ea17827b1e69221 Mon Sep 17 00:00:00 2001 From: David Michaels Date: Fri, 9 Feb 2024 09:17:08 -0500 Subject: [PATCH 09/21] comment --- dcicutils/datetime_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dcicutils/datetime_utils.py b/dcicutils/datetime_utils.py index 816b3e8df..bd8d599d6 100644 --- a/dcicutils/datetime_utils.py +++ b/dcicutils/datetime_utils.py @@ -65,7 +65,7 @@ def normalize_datetime_string(value: str) -> Optional[str]: or if ill-formated then returns None. The given string is assumed to be in the format "YYYY-MM-DD hh:mm:ss" and with an optional timezone suffix in format "+hh:mm" or "+hh". Also allowed is just a date of the format "YYYY-MM-DD" in which case a time of "00:00:00" is assumed. If no timezone is specified then - the local timezone is assumed. The returned format looks like this: "2024-02-08T10:37:51-05:00" + the local timezone is assumed. The returned format looks like this: "2024-02-08T10:37:51-05:00" """ dt = parse_datetime_string(value) return dt.isoformat() if dt else None From c04f02ab56aa6757ef7f2405e062f55348cd2285 Mon Sep 17 00:00:00 2001 From: David Michaels Date: Sun, 11 Feb 2024 10:52:47 -0500 Subject: [PATCH 10/21] Addec view-portal-object script for general troubleshooting. --- CHANGELOG.rst | 1 + dcicutils/captured_output.py | 71 +++++++++++ dcicutils/data_readers.py | 5 +- dcicutils/scripts/view_portal_object.py | 155 ++++++++++++++++++++++++ poetry.lock | 12 +- pyproject.toml | 4 +- 6 files changed, 244 insertions(+), 4 deletions(-) create mode 100644 dcicutils/captured_output.py create mode 100644 dcicutils/scripts/view_portal_object.py diff --git a/CHANGELOG.rst b/CHANGELOG.rst index af880022e..c397f4025 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -12,6 +12,7 @@ Change Log * Changes to structured_data support date/time types. * Changes to structured_data support internal references in any order. * New datetime_utils module and tests; first created for date/time support in structured_data. +* Added view-portal-object script for general troubleshooting. 8.7.2 diff --git a/dcicutils/captured_output.py b/dcicutils/captured_output.py new file mode 100644 index 000000000..b0a6205fd --- /dev/null +++ b/dcicutils/captured_output.py @@ -0,0 +1,71 @@ +# TODO: Move to dcicutils. +from collections import namedtuple +from contextlib import contextmanager +import io +import sys +from typing import Optional + +_real_stdout = sys.stdout +_real_stderr = sys.stderr + + +@contextmanager +def captured_output(capture: bool = True): + """ + Context manager to capture any/all output to stdout or stderr, and not actually output it to stdout + or stderr. Yields and object with a get_captured_output() method to get the output captured thus far, + and another uncaptured_print() method to actually print the given output to stdout, even though output + to stdout is being captured. Can be useful, for example, in creating command-line scripts which invoke + code which outputs a lot of info, warning, error, etc to stdout or stderr, and we want to suprress that + output; but with the yielded uncaptured_print() method output specific to the script can actually be + output (to stdout); and/or can also optionally output any/all captured output, e.g. for debugging or + troubleshooting purposes. Disable this capture, without having to restructure your code WRT the usage + of the with-clause with this context manager, pass False as an argument to this context manager. + """ + + original_stdout = _real_stdout + original_stderr = _real_stderr + captured_output = io.StringIO() + + def set_original_output() -> None: + sys.stdout = original_stdout + sys.stderr = original_stderr + + def set_captured_output() -> None: + if capture: + sys.stdout = captured_output + sys.stderr = captured_output + + def uncaptured_print(*args, **kwargs) -> None: + set_original_output() + print(*args, **kwargs) + set_captured_output() + + def uncaptured_input(message: str) -> str: + set_original_output() + value = input(message) + set_captured_output() + return value + + def get_captured_output() -> Optional[str]: + return captured_output.getvalue() if capture else None + + try: + set_captured_output() + Result = namedtuple("Result", ["get_captured_output", "uncaptured_print", "uncaptured_input"]) + yield Result(get_captured_output, uncaptured_print, uncaptured_input) + finally: + set_original_output() + + +@contextmanager +def uncaptured_output(): + original_stdout = sys.stdout + original_stderr = sys.stderr + sys.stdout = _real_stdout + sys.stderr = _real_stderr + try: + yield + finally: + sys.stdout = original_stdout + sys.stderr = original_stderr diff --git a/dcicutils/data_readers.py b/dcicutils/data_readers.py index cb5bfb2f1..514d24938 100644 --- a/dcicutils/data_readers.py +++ b/dcicutils/data_readers.py @@ -152,9 +152,10 @@ def open(self) -> None: class Excel: - def __init__(self, file: str, reader_class: Optional[Type] = None) -> None: + def __init__(self, file: str, reader_class: Optional[Type] = None, include_hidden_sheets: bool = False) -> None: self._file = file self._workbook = None + self._include_hidden_sheets = include_hidden_sheets self.sheet_names = None if isinstance(reader_class, Type) and issubclass(reader_class, ExcelSheetReader): self._reader_class = reader_class @@ -169,7 +170,7 @@ def open(self) -> None: if self._workbook is None: self._workbook = openpyxl.load_workbook(self._file, data_only=True) self.sheet_names = [sheet_name for sheet_name in self._workbook.sheetnames - if self._workbook[sheet_name].sheet_state != "hidden"] + if self._include_hidden_sheets or (self._workbook[sheet_name].sheet_state != "hidden")] def __del__(self) -> None: if (workbook := self._workbook) is not None: diff --git a/dcicutils/scripts/view_portal_object.py b/dcicutils/scripts/view_portal_object.py new file mode 100644 index 000000000..e9ab75bfb --- /dev/null +++ b/dcicutils/scripts/view_portal_object.py @@ -0,0 +1,155 @@ +# ------------------------------------------------------------------------------------------------------ +# Command-line utility to retrieve and print the given object (UUID) from a SMaHT/CGAP/Fourfront Portal. +# ------------------------------------------------------------------------------------------------------ +# Example command: +# view-portal-object 4483b19d-62e7-4e7f-a211-0395343a35df --yaml +# +# Example output: +# '@context': /terms/ +# '@id': /access-keys/3968e38e-c11f-472e-8531-8650e2e296d4/ +# '@type': +# - AccessKey +# - Item +# access_key_id: NSVCZ75O +# date_created: '2023-09-06T13:11:59.704005+00:00' +# description: Manually generated local access-key for testing. +# display_title: AccessKey from 2023-09-06 +# expiration_date: '2023-12-05T13:11:59.714106' +# last_modified: +# date_modified: '2023-09-06T13:11:59.711367+00:00' +# modified_by: +# '@id': /users/3202fd57-44d2-44fb-a131-afb1e43d8ae5/ +# '@type': +# - User +# - Item +# status: current +# uuid: 3202fd57-44d2-44fb-a131-afb1e43d8ae5 +# principals_allowed: +# edit: +# - group.admin +# - userid.74fef71a-dfc1-4aa4-acc0-cedcb7ac1d68 +# view: +# - group.admin +# - group.read-only-admin +# - userid.74fef71a-dfc1-4aa4-acc0-cedcb7ac1d68 +# schema_version: '1' +# status: current +# user: +# '@id': /users/74fef71a-dfc1-4aa4-acc0-cedcb7ac1d68/ +# '@type': +# - User +# - Item +# display_title: David Michaels +# principals_allowed: +# edit: +# - group.admin +# view: +# - group.admin +# - group.read-only-admin +# status: current +# uuid: 74fef71a-dfc1-4aa4-acc0-cedcb7ac1d68 +# uuid: 3968e38e-c11f-472e-8531-8650e2e296d4 +# +# Note that instead of a uuid you can also actually use a path, for example: +# view-local-object /file-formats/vcf_gz_tbi +# +# -------------------------------------------------------------------------------------------------- + +import argparse +import json +import pyperclip +import sys +from typing import Optional +import yaml +from dcicutils.misc_utils import get_error_message +from dcicutils.portal_utils import Portal +from dcicutils.captured_output import captured_output, uncaptured_output + + +def main(): + + parser = argparse.ArgumentParser(description="View Portal object.") + parser.add_argument("uuid", type=str, + help=f"The uuid (or path) of the object to fetch and view. ") + parser.add_argument("--ini", type=str, required=False, default=None, + help=f"Name of the application .ini file.") + parser.add_argument("--env", "-e", type=str, required=False, default=None, + help=f"Environment name (key from ~/.smaht-keys.json).") + parser.add_argument("--server", "-s", type=str, required=False, default=None, + help=f"Environment server name (server from key in ~/.smaht-keys.json).") + parser.add_argument("--app", type=str, required=False, default=None, + help=f"Application name (one of: smaht, cgap, fourfront).") + parser.add_argument("--raw", action="store_true", required=False, default=False, help="Raw output.") + parser.add_argument("--database", action="store_true", required=False, default=False, + help="Read from database output.") + parser.add_argument("--yaml", action="store_true", required=False, default=False, help="YAML output.") + parser.add_argument("--copy", "-c", action="store_true", required=False, default=False, + help="Copy object data to clipboard.") + parser.add_argument("--verbose", action="store_true", required=False, default=False, help="Verbose output.") + parser.add_argument("--debug", action="store_true", required=False, default=False, help="Debugging output.") + args = parser.parse_args() + + portal = _create_portal(ini=args.ini, env=args.env, server=args.server, app=args.app, debug=args.debug) + data = _get_portal_object(portal=portal, uuid=args.uuid, raw=args.raw, database=args.database, verbose=args.verbose) + + if args.copy: + pyperclip.copy(json.dumps(data, indent=4)) + if args.yaml: + _print(yaml.dump(data)) + else: + _print(json.dumps(data, default=str, indent=4)) + + +def _create_portal(ini: str, env: Optional[str] = None, + server: Optional[str] = None, app: Optional[str] = None, debug: bool = False) -> Portal: + with captured_output(not debug): + return Portal(env, server=server, app=app) if env or app else Portal(ini) + + +def _get_portal_object(portal: Portal, uuid: str, + raw: bool = False, database: bool = False, verbose: bool = False) -> dict: + if verbose: + _print(f"Getting object ({uuid}) from portal ... ", end="") + response = None + try: + if not uuid.startswith("/"): + path = f"/{uuid}" + else: + path = uuid + response = portal.get(path, raw=raw, database=database) + except Exception as e: + if "404" in str(e) and "not found" in str(e).lower(): + if verbose: + _print("Not found!") + else: + _print(f"Object ({uuid}) not found!") + _exit_without_action() + _exit_without_action(f"Exception getting object ({uuid}) -> {get_error_message(e)}", newline=verbose) + if not response: + _exit_without_action(f"Null response getting object {uuid}).") + if response.status_code not in [200, 307]: + # TODO: Understand why the /me endpoint returns HTTP status code 307, which is only why we mention it above. + _exit_without_action(f"Invalid status code ({response.status_code}) getting object: {uuid}") + if not response.json: + _exit_without_action(f"Invalid JSON getting object {uuid}).") + if verbose: + _print("OK") + return response.json() + + +def _print(*args, **kwargs): + with uncaptured_output(): + print(*args, **kwargs) + sys.stdout.flush() + + +def _exit_without_action(message: Optional[str] = None, newline: bool = True) -> None: + if message: + if newline: + _print() + _print(f"ERROR: {message}") + exit(1) + + +if __name__ == "__main__": + main() diff --git a/poetry.lock b/poetry.lock index 9c93ec47a..19e24dc54 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1333,6 +1333,16 @@ cryptography = ">=38.0.0,<40.0.0 || >40.0.0,<40.0.1 || >40.0.1,<42" docs = ["sphinx (!=5.2.0,!=5.2.0.post0)", "sphinx-rtd-theme"] test = ["flaky", "pretend", "pytest (>=3.0.1)"] +[[package]] +name = "pyperclip" +version = "1.8.2" +description = "A cross-platform clipboard module for Python. (Only handles plain text for now.)" +optional = false +python-versions = "*" +files = [ + {file = "pyperclip-1.8.2.tar.gz", hash = "sha256:105254a8b04934f0bc84e9c24eb360a591aaf6535c9def5f29d92af107a9bf57"}, +] + [[package]] name = "pyramid" version = "1.10.4" @@ -2092,4 +2102,4 @@ testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"] [metadata] lock-version = "2.0" python-versions = ">=3.8,<3.12" -content-hash = "f809dee9b269816495aebe98b7299657df63d20446e140df3635955ca358c861" +content-hash = "e921aaa6ed6c8f604a55b2fdff146f4a01429e9ac7653a36be3078c38b654094" diff --git a/pyproject.toml b/pyproject.toml index e2a6273a6..edf0ddb9e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "dcicutils" -version = "8.7.2.1b2" # TODO: To become 8.8.0 +version = "8.7.2.1b3" # TODO: To become 8.8.0 description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources" authors = ["4DN-DCIC Team "] license = "MIT" @@ -56,6 +56,7 @@ PyJWT = "^2.6.0" pyramid = "1.10.4" pytz = ">=2020.4" redis = "^4.5.1" +pyperclip = "^1.8.2" PyYAML = "^6.0.1" requests = "^2.21.0" rfc3986 = "^1.4.0" @@ -88,6 +89,7 @@ pytest-runner = ">=5.1" publish-to-pypi = "dcicutils.scripts.publish_to_pypi:main" show-contributors = "dcicutils.contribution_scripts:show_contributors_main" run-license-checker = "dcicutils.scripts.run_license_checker:main" +view-portal-object = "dcicutils.scripts.view_portal_object:main" [tool.pytest.ini_options] From a5bb47f34f874a5d430b62e7003dfecad0b3811d Mon Sep 17 00:00:00 2001 From: David Michaels Date: Sun, 11 Feb 2024 10:54:24 -0500 Subject: [PATCH 11/21] doc for captured_output --- docs/source/dcicutils.rst | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/docs/source/dcicutils.rst b/docs/source/dcicutils.rst index 4ef4e29f5..1fca86c93 100644 --- a/docs/source/dcicutils.rst +++ b/docs/source/dcicutils.rst @@ -30,6 +30,13 @@ bundle_utils :members: +captured_output +^^^^^^^^^^^^^^^ + +.. automodule:: dcicutils.captured_output + :members: + + codebuild_utils ^^^^^^^^^^^^^^^ From b15cd96993bb2c013243a3db0884b1e3ff8ed169 Mon Sep 17 00:00:00 2001 From: David Michaels Date: Sun, 11 Feb 2024 10:57:46 -0500 Subject: [PATCH 12/21] doc for captured_output --- dcicutils/captured_output.py | 1 - 1 file changed, 1 deletion(-) diff --git a/dcicutils/captured_output.py b/dcicutils/captured_output.py index b0a6205fd..be22cd16a 100644 --- a/dcicutils/captured_output.py +++ b/dcicutils/captured_output.py @@ -1,4 +1,3 @@ -# TODO: Move to dcicutils. from collections import namedtuple from contextlib import contextmanager import io From 6fa09cf03948c389a90899c212107a04df64dbb0 Mon Sep 17 00:00:00 2001 From: David Michaels Date: Sun, 11 Feb 2024 13:46:12 -0500 Subject: [PATCH 13/21] minor adjustments to portal_utils --- dcicutils/portal_utils.py | 87 +++++++++++++++++++++++---------------- 1 file changed, 51 insertions(+), 36 deletions(-) diff --git a/dcicutils/portal_utils.py b/dcicutils/portal_utils.py index c854de30b..9b4e9b05c 100644 --- a/dcicutils/portal_utils.py +++ b/dcicutils/portal_utils.py @@ -88,9 +88,9 @@ def init_from_key(key: dict, server: Optional[str], unspecified: Optional[list] isinstance(secret := key.get("secret"), str) and secret): # noqa self._key = {"key": key_id, "secret": secret} if (isinstance(server, str) and server) or (isinstance(server := key.get("server"), str) and server): - if server := normalize_server(server): + if server := Portal._normalize_server(server): if isinstance(key_server := key.get("server"), str) and key_server: - if normalize_server(key_server) != server: + if Portal._normalize_server(key_server) != server: raise Exception(f"Portal server inconsistency: {server} vs {key_server}") self._key["server"] = self._server = server if not self._key: @@ -104,49 +104,21 @@ def init_from_key_pair(key_pair: tuple, server: Optional[str], unspecified: Opti def init_from_keys_file(keys_file: str, env: Optional[str], server: Optional[str], unspecified: Optional[list] = []) -> None: - try: - with io.open(keys_file := os.path.expanduser(keys_file)) as f: - keys = json.load(f) - except Exception: - raise Exception(f"Portal initialization error; cannot open keys-file: {keys_file}") - if isinstance(env, str) and env and isinstance(key := keys.get(env), dict): - init_from_key(key, server) - self._keys_file = keys_file - self._env = env - elif (isinstance(server, str) and (server := normalize_server(server)) and - (key := [keys[k] for k in keys if normalize_server(keys[k].get("server")) == server])): - init_from_key(key[0], server) - self._keys_file = keys_file - elif not env and len(keys) == 1 and (env := next(iter(keys))) and isinstance(key := keys[env], dict): + key, env = Portal._lookup_in_keys_file(keys_file, env, server, raise_exception=True) + if key: init_from_key(key, server) self._keys_file = keys_file self._env = env - else: - raise Exception(f"Portal initialization error;" - f" {env or server or None} not found in keys-file: {keys_file}") def init_from_env_server_app(env: str, server: str, app: Optional[str], unspecified: Optional[list] = None) -> None: - if keys_file := self._default_keys_file(app, env): + if keys_file := Portal._default_keys_file(app, env, server): init_from_keys_file(keys_file, env, server, unspecified=unspecified) else: init(unspecified=unspecified) self._env = env self._server = server - def normalize_server(server: str) -> Optional[str]: - prefix = "" - if (lowercase_server := server.lower()).startswith("http://"): - prefix = "http://" - elif lowercase_server.startswith("https://"): - prefix = "https://" - if prefix: - if (server := re.sub(r"/+", "/", server[len(prefix):])).startswith("/"): - server = server[1:] - if len(server) > 1 and server.endswith("/"): - server = server[:-1] - return prefix + server if server else None - if (valid_app := app) and not (valid_app := Portal._valid_app(app)): raise Exception(f"Portal initialization error; invalid app: {app}") self._app = valid_app @@ -166,7 +138,7 @@ def normalize_server(server: str) -> Optional[str]: init_from_env_server_app(arg, server, app, unspecified=[env]) elif (isinstance(env, str) and env) or (isinstance(server, str) and server): init_from_env_server_app(env, server, app, unspecified=[arg]) - elif not arg and (keys_file := self._default_keys_file(app=self._app or Portal.DEFAULT_APP, env=env)): + elif not arg and (keys_file := Portal._default_keys_file(self._app or Portal.DEFAULT_APP, env, server)): # If no initial arg then look for default app keys file. init_from_keys_file(keys_file, env, server) elif raise_exception: @@ -411,14 +383,57 @@ def _kwargs(self, **kwargs) -> dict: result_kwargs["timeout"] = timeout return result_kwargs - def _default_keys_file(self, app: Optional[str], env: Optional[str] = None) -> Optional[str]: + @staticmethod + def _default_keys_file(app: Optional[str], env: Optional[str], server: Optional[str]) -> Optional[str]: def infer_app_from_env(env: str) -> Optional[str]: # noqa if isinstance(env, str) and (lowercase_env := env.lower()): if app := [app for app in ORCHESTRATED_APPS if lowercase_env.startswith(app.lower())]: - return self._valid_app(app[0]) + return Portal._valid_app(app[0]) if (app := Portal._valid_app(app)) or (app := infer_app_from_env(env)): keys_file = os.path.expanduser(os.path.join(Portal.KEYS_FILE_DIRECTORY, f".{app.lower()}-keys.json")) return keys_file if os.path.exists(keys_file) else None + if not app: + for app in ORCHESTRATED_APPS: + if keys_file := Portal._default_keys_file(app, env, server): + if Portal._lookup_in_keys_file(keys_file, env, server)[0]: + return keys_file + + @staticmethod + def _lookup_in_keys_file(keys_file: str, env: Optional[str], server: Optional[str], + raise_exception: bool = False) -> Tuple[Optional[dict], Optional[str]]: + try: + with io.open(keys_file := os.path.expanduser(keys_file)) as f: + keys = json.load(f) + except Exception: + if raise_exception: + raise Exception(f"Portal initialization error; cannot open keys-file: {keys_file}") + return None, None + if isinstance(env, str) and env and isinstance(key := keys.get(env), dict): + return key, env + elif (isinstance(server, str) and (server := Portal._normalize_server(server)) and + (key := [keys[k] for k in keys if Portal._normalize_server(keys[k].get("server")) == server])): + return key[0], env + elif not env and len(keys) == 1 and (env := next(iter(keys))) and isinstance(key := keys[env], dict): + return key, env + else: + if raise_exception: + raise Exception(f"Portal initialization error;" + f" {env or server or None} not found in keys-file: {keys_file}") + return None, None + + @staticmethod + def _normalize_server(server: str) -> Optional[str]: + prefix = "" + if (lowercase_server := server.lower()).startswith("http://"): + prefix = "http://" + elif lowercase_server.startswith("https://"): + prefix = "https://" + if prefix: + if (server := re.sub(r"/+", "/", server[len(prefix):])).startswith("/"): + server = server[1:] + if len(server) > 1 and server.endswith("/"): + server = server[:-1] + return prefix + server if server else None @staticmethod def _valid_app(app: Optional[str]) -> Optional[str]: From 7341023367d2e7c6a78678274c3a928f37d1c2a7 Mon Sep 17 00:00:00 2001 From: David Michaels Date: Sun, 11 Feb 2024 13:46:23 -0500 Subject: [PATCH 14/21] version --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index edf0ddb9e..77d6a955e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "dcicutils" -version = "8.7.2.1b3" # TODO: To become 8.8.0 +version = "8.7.2.1b4" # TODO: To become 8.8.0 description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources" authors = ["4DN-DCIC Team "] license = "MIT" From a0a8063f91aa3fa7717839ed11137ece8749786d Mon Sep 17 00:00:00 2001 From: David Michaels Date: Sun, 11 Feb 2024 13:57:58 -0500 Subject: [PATCH 15/21] view-portal-object script updates. --- dcicutils/scripts/view_portal_object.py | 29 +++++++++++++++---------- 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/dcicutils/scripts/view_portal_object.py b/dcicutils/scripts/view_portal_object.py index e9ab75bfb..723c517fb 100644 --- a/dcicutils/scripts/view_portal_object.py +++ b/dcicutils/scripts/view_portal_object.py @@ -109,7 +109,17 @@ def _create_portal(ini: str, env: Optional[str] = None, def _get_portal_object(portal: Portal, uuid: str, raw: bool = False, database: bool = False, verbose: bool = False) -> dict: if verbose: - _print(f"Getting object ({uuid}) from portal ... ", end="") + _print(f"Getting object from Portal: {uuid}") + if portal.env: + _print(f"Portal environment: {portal.env}") + if portal.keys_file: + _print(f"Portal keys file: {portal.keys_file}") + if portal.key_id: + _print(f"Portal key prefix: {portal.key_id[0:2]}******") + if portal.ini_file: + _print(f"Portal ini file: {portal.ini_file}") + if portal.server: + _print(f"Portal server: {portal.server}") response = None try: if not uuid.startswith("/"): @@ -119,19 +129,16 @@ def _get_portal_object(portal: Portal, uuid: str, response = portal.get(path, raw=raw, database=database) except Exception as e: if "404" in str(e) and "not found" in str(e).lower(): - if verbose: - _print("Not found!") - else: - _print(f"Object ({uuid}) not found!") + _print("Portal object not found: {uuid}") _exit_without_action() - _exit_without_action(f"Exception getting object ({uuid}) -> {get_error_message(e)}", newline=verbose) + _exit_without_action(f"Exception getting Portal object: {uuid}\n{get_error_message(e)}") if not response: - _exit_without_action(f"Null response getting object {uuid}).") + _exit_without_action(f"Null response getting Portal object: {uuid}") if response.status_code not in [200, 307]: # TODO: Understand why the /me endpoint returns HTTP status code 307, which is only why we mention it above. - _exit_without_action(f"Invalid status code ({response.status_code}) getting object: {uuid}") + _exit_without_action(f"Invalid status code ({response.status_code}) getting Portal object: {uuid}") if not response.json: - _exit_without_action(f"Invalid JSON getting object {uuid}).") + _exit_without_action(f"Invalid JSON getting Portal object: {uuid}") if verbose: _print("OK") return response.json() @@ -143,10 +150,8 @@ def _print(*args, **kwargs): sys.stdout.flush() -def _exit_without_action(message: Optional[str] = None, newline: bool = True) -> None: +def _exit_without_action(message: Optional[str] = None) -> None: if message: - if newline: - _print() _print(f"ERROR: {message}") exit(1) From 512928f5794bb7287a6c52ac1a5d0d8cf3115e01 Mon Sep 17 00:00:00 2001 From: David Michaels Date: Sun, 11 Feb 2024 17:46:25 -0500 Subject: [PATCH 16/21] expanded on view-portal-object script --- dcicutils/scripts/view_portal_object.py | 42 +++++++++++++++++++++++-- 1 file changed, 40 insertions(+), 2 deletions(-) diff --git a/dcicutils/scripts/view_portal_object.py b/dcicutils/scripts/view_portal_object.py index 723c517fb..25a4604fc 100644 --- a/dcicutils/scripts/view_portal_object.py +++ b/dcicutils/scripts/view_portal_object.py @@ -61,9 +61,10 @@ import sys from typing import Optional import yaml +from dcicutils.captured_output import captured_output, uncaptured_output from dcicutils.misc_utils import get_error_message from dcicutils.portal_utils import Portal -from dcicutils.captured_output import captured_output, uncaptured_output +from dcicutils.structured_data import Schema def main(): @@ -79,6 +80,8 @@ def main(): help=f"Environment server name (server from key in ~/.smaht-keys.json).") parser.add_argument("--app", type=str, required=False, default=None, help=f"Application name (one of: smaht, cgap, fourfront).") + parser.add_argument("--schema", action="store_true", required=False, default=False, + help="View named schema rather than object.") parser.add_argument("--raw", action="store_true", required=False, default=False, help="Raw output.") parser.add_argument("--database", action="store_true", required=False, default=False, help="Read from database output.") @@ -90,7 +93,14 @@ def main(): args = parser.parse_args() portal = _create_portal(ini=args.ini, env=args.env, server=args.server, app=args.app, debug=args.debug) - data = _get_portal_object(portal=portal, uuid=args.uuid, raw=args.raw, database=args.database, verbose=args.verbose) + if args.uuid == "schemas": + _print_all_schema_names(portal=portal, verbose=args.verbose) + return + elif args.schema: + data = _get_schema(portal=portal, schema_name=args.uuid) + else: + data = _get_portal_object(portal=portal, uuid=args.uuid, raw=args.raw, + database=args.database, verbose=args.verbose) if args.copy: pyperclip.copy(json.dumps(data, indent=4)) @@ -144,6 +154,34 @@ def _get_portal_object(portal: Portal, uuid: str, return response.json() +def _get_schema(portal: Portal, schema_name: str) -> Optional[dict]: + def rummage_for_schema_name(portal: Portal, schema_name: str) -> Optional[str]: # noqa + if schemas := portal.get_schemas(): + for schema in schemas: + if schema.lower() == schema_name.lower(): + return schema + schema = Schema.load_by_name(schema_name, portal) + if not schema: + if schema_name := rummage_for_schema_name(portal, schema_name): + schema = Schema.load_by_name(schema_name, portal) + return schema.data if schema else None + + +def _print_all_schema_names(portal: Portal, verbose: bool = False) -> None: + if schemas := portal.get_schemas(): + for schema in sorted(schemas.keys()): + _print(schema) + if verbose: + if identifying_properties := schemas[schema].get("identifyingProperties"): + _print("- identifying properties:") + for identifying_property in sorted(identifying_properties): + _print(f" - {identifying_property}") + if required_properties := schemas[schema].get("required"): + _print("- required properties:") + for required_property in sorted(required_properties): + _print(f" - {required_property}") + + def _print(*args, **kwargs): with uncaptured_output(): print(*args, **kwargs) From c4a034f366aec151c9c8a99b60baa8cebaee2dda Mon Sep 17 00:00:00 2001 From: David Michaels Date: Sun, 11 Feb 2024 17:46:35 -0500 Subject: [PATCH 17/21] expanded on view-portal-object script --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 77d6a955e..80cfe3281 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "dcicutils" -version = "8.7.2.1b4" # TODO: To become 8.8.0 +version = "8.7.2.1b5" # TODO: To become 8.8.0 description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources" authors = ["4DN-DCIC Team "] license = "MIT" From 3d4e678c352f868153a2cd531078bd474d08f7bb Mon Sep 17 00:00:00 2001 From: David Michaels Date: Tue, 13 Feb 2024 08:34:56 -0500 Subject: [PATCH 18/21] misc updates to portal_utils etc --- dcicutils/portal_object_utils.py | 12 +++++++----- dcicutils/schema_utils.py | 4 +++- dcicutils/scripts/view_portal_object.py | 2 +- 3 files changed, 11 insertions(+), 7 deletions(-) diff --git a/dcicutils/portal_object_utils.py b/dcicutils/portal_object_utils.py index cecc02a03..be93b1223 100644 --- a/dcicutils/portal_object_utils.py +++ b/dcicutils/portal_object_utils.py @@ -13,9 +13,11 @@ class PortalObject: _PROPERTY_DELETION_SENTINEL = RowReader.CELL_DELETION_SENTINEL - def __init__(self, portal_object: dict, portal: Portal = None, type: Optional[str] = None) -> None: - self._data = portal_object - self._portal = portal + def __init__(self, data: dict, portal: Portal = None, + schema: Optional[Union[dict, Schema]] = None, type: Optional[str] = None) -> None: + self._data = data if isinstance(data, dict) else {} + self._portal = portal if isinstance(portal, Portal) else None + self._schema = schema if isinstance(schema, dict) else (schema.data if isinstance(schema, Schema) else None) self._type = type if isinstance(type, str) and type else None @property @@ -29,7 +31,7 @@ def portal(self) -> Optional[Portal]: @property @lru_cache(maxsize=1) def type(self) -> Optional[str]: - return self._type or Portal.get_schema_type(self._data) + return self._type or Portal.get_schema_type(self._data) or (Schema(self._schema).type if self._schema else None) @property @lru_cache(maxsize=1) @@ -44,7 +46,7 @@ def uuid(self) -> Optional[str]: @property @lru_cache(maxsize=1) def schema(self) -> Optional[dict]: - return self._portal.get_schema(self.type) if self._portal else None + return self._schema if self._schema else (self._portal.get_schema(self.type) if self._portal else None) def copy(self) -> PortalObject: return PortalObject(deepcopy(self.data), portal=self.portal, type=self.type) diff --git a/dcicutils/schema_utils.py b/dcicutils/schema_utils.py index 216886bc9..b5b1bf425 100644 --- a/dcicutils/schema_utils.py +++ b/dcicutils/schema_utils.py @@ -191,7 +191,9 @@ class Schema: def __init__(self, schema: dict, type: Optional[str] = None) -> None: self._data = schema if isinstance(schema, dict) else (schema.data if isinstance(schema, Schema) else {}) - self._type = (type if isinstance(type, str) else "") or Schema.type_name(self._data.get("title", "")) + self._type = ((type if isinstance(type, str) else "") or + Schema.type_name(self._data.get("title", "")) or + Schema.type_name(self._data.get("$id", ""))) @property def data(self) -> dict: diff --git a/dcicutils/scripts/view_portal_object.py b/dcicutils/scripts/view_portal_object.py index 25a4604fc..87eefcb56 100644 --- a/dcicutils/scripts/view_portal_object.py +++ b/dcicutils/scripts/view_portal_object.py @@ -139,7 +139,7 @@ def _get_portal_object(portal: Portal, uuid: str, response = portal.get(path, raw=raw, database=database) except Exception as e: if "404" in str(e) and "not found" in str(e).lower(): - _print("Portal object not found: {uuid}") + _print(f"Portal object not found: {uuid}") _exit_without_action() _exit_without_action(f"Exception getting Portal object: {uuid}\n{get_error_message(e)}") if not response: From 7e27b303cc85a5ceaf665c569d5a5844370d52f6 Mon Sep 17 00:00:00 2001 From: David Michaels Date: Tue, 13 Feb 2024 08:35:12 -0500 Subject: [PATCH 19/21] version --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 80cfe3281..f4daa650a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "dcicutils" -version = "8.7.2.1b5" # TODO: To become 8.8.0 +version = "8.7.2.1b6" # TODO: To become 8.8.0 description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources" authors = ["4DN-DCIC Team "] license = "MIT" From 43a3138d03a8de77de9b1e0f4975894d085005e2 Mon Sep 17 00:00:00 2001 From: David Michaels Date: Thu, 15 Feb 2024 07:41:13 -0500 Subject: [PATCH 20/21] Change to data_readers to ignore sheets with name enclosed in parenthesis. --- CHANGELOG.rst | 1 + dcicutils/data_readers.py | 10 +++++++++- pyproject.toml | 2 +- 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index c397f4025..3e776de56 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -13,6 +13,7 @@ Change Log * Changes to structured_data support internal references in any order. * New datetime_utils module and tests; first created for date/time support in structured_data. * Added view-portal-object script for general troubleshooting. +* Change to data_reader to ignore sheet names enclosed in parenthesis. 8.7.2 diff --git a/dcicutils/data_readers.py b/dcicutils/data_readers.py index 514d24938..39450d11c 100644 --- a/dcicutils/data_readers.py +++ b/dcicutils/data_readers.py @@ -170,7 +170,15 @@ def open(self) -> None: if self._workbook is None: self._workbook = openpyxl.load_workbook(self._file, data_only=True) self.sheet_names = [sheet_name for sheet_name in self._workbook.sheetnames - if self._include_hidden_sheets or (self._workbook[sheet_name].sheet_state != "hidden")] + if not self.is_hidden_sheet(self._workbook[sheet_name])] + + def is_hidden_sheet(self, sheet: openpyxl.worksheet.worksheet.Worksheet) -> bool: + if not self._include_hidden_sheets: + if sheet.sheet_state == "hidden": + return True + if sheet.title.startswith("(") and sheet.title.endswith(")"): + return True + return False def __del__(self) -> None: if (workbook := self._workbook) is not None: diff --git a/pyproject.toml b/pyproject.toml index f4daa650a..369213a39 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "dcicutils" -version = "8.7.2.1b6" # TODO: To become 8.8.0 +version = "8.7.2.1b7" # TODO: To become 8.8.0 description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources" authors = ["4DN-DCIC Team "] license = "MIT" From a46f772605890ecb16dac7923d5d2d6b87d6f9ab Mon Sep 17 00:00:00 2001 From: David Michaels Date: Thu, 15 Feb 2024 07:55:55 -0500 Subject: [PATCH 21/21] version to 8.8.0; ready to merge to master --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 369213a39..0583b995f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "dcicutils" -version = "8.7.2.1b7" # TODO: To become 8.8.0 +version = "8.8.0" description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources" authors = ["4DN-DCIC Team "] license = "MIT"