diff --git a/CHANGELOG.rst b/CHANGELOG.rst index fb886824a..3e776de56 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -7,6 +7,15 @@ Change Log ---------- +8.8.0 +===== +* Changes to structured_data support date/time types. +* Changes to structured_data support internal references in any order. +* New datetime_utils module and tests; first created for date/time support in structured_data. +* Added view-portal-object script for general troubleshooting. +* Change to data_reader to ignore sheet names enclosed in parenthesis. + + 8.7.2 ===== diff --git a/dcicutils/captured_output.py b/dcicutils/captured_output.py new file mode 100644 index 000000000..be22cd16a --- /dev/null +++ b/dcicutils/captured_output.py @@ -0,0 +1,70 @@ +from collections import namedtuple +from contextlib import contextmanager +import io +import sys +from typing import Optional + +_real_stdout = sys.stdout +_real_stderr = sys.stderr + + +@contextmanager +def captured_output(capture: bool = True): + """ + Context manager to capture any/all output to stdout or stderr, and not actually output it to stdout + or stderr. Yields and object with a get_captured_output() method to get the output captured thus far, + and another uncaptured_print() method to actually print the given output to stdout, even though output + to stdout is being captured. Can be useful, for example, in creating command-line scripts which invoke + code which outputs a lot of info, warning, error, etc to stdout or stderr, and we want to suprress that + output; but with the yielded uncaptured_print() method output specific to the script can actually be + output (to stdout); and/or can also optionally output any/all captured output, e.g. for debugging or + troubleshooting purposes. Disable this capture, without having to restructure your code WRT the usage + of the with-clause with this context manager, pass False as an argument to this context manager. + """ + + original_stdout = _real_stdout + original_stderr = _real_stderr + captured_output = io.StringIO() + + def set_original_output() -> None: + sys.stdout = original_stdout + sys.stderr = original_stderr + + def set_captured_output() -> None: + if capture: + sys.stdout = captured_output + sys.stderr = captured_output + + def uncaptured_print(*args, **kwargs) -> None: + set_original_output() + print(*args, **kwargs) + set_captured_output() + + def uncaptured_input(message: str) -> str: + set_original_output() + value = input(message) + set_captured_output() + return value + + def get_captured_output() -> Optional[str]: + return captured_output.getvalue() if capture else None + + try: + set_captured_output() + Result = namedtuple("Result", ["get_captured_output", "uncaptured_print", "uncaptured_input"]) + yield Result(get_captured_output, uncaptured_print, uncaptured_input) + finally: + set_original_output() + + +@contextmanager +def uncaptured_output(): + original_stdout = sys.stdout + original_stderr = sys.stderr + sys.stdout = _real_stdout + sys.stderr = _real_stderr + try: + yield + finally: + sys.stdout = original_stdout + sys.stderr = original_stderr diff --git a/dcicutils/data_readers.py b/dcicutils/data_readers.py index cb5bfb2f1..39450d11c 100644 --- a/dcicutils/data_readers.py +++ b/dcicutils/data_readers.py @@ -152,9 +152,10 @@ def open(self) -> None: class Excel: - def __init__(self, file: str, reader_class: Optional[Type] = None) -> None: + def __init__(self, file: str, reader_class: Optional[Type] = None, include_hidden_sheets: bool = False) -> None: self._file = file self._workbook = None + self._include_hidden_sheets = include_hidden_sheets self.sheet_names = None if isinstance(reader_class, Type) and issubclass(reader_class, ExcelSheetReader): self._reader_class = reader_class @@ -169,7 +170,15 @@ def open(self) -> None: if self._workbook is None: self._workbook = openpyxl.load_workbook(self._file, data_only=True) self.sheet_names = [sheet_name for sheet_name in self._workbook.sheetnames - if self._workbook[sheet_name].sheet_state != "hidden"] + if not self.is_hidden_sheet(self._workbook[sheet_name])] + + def is_hidden_sheet(self, sheet: openpyxl.worksheet.worksheet.Worksheet) -> bool: + if not self._include_hidden_sheets: + if sheet.sheet_state == "hidden": + return True + if sheet.title.startswith("(") and sheet.title.endswith(")"): + return True + return False def __del__(self) -> None: if (workbook := self._workbook) is not None: diff --git a/dcicutils/datetime_utils.py b/dcicutils/datetime_utils.py new file mode 100644 index 000000000..bd8d599d6 --- /dev/null +++ b/dcicutils/datetime_utils.py @@ -0,0 +1,98 @@ +from dcicutils.misc_utils import normalize_spaces +from datetime import datetime, timedelta, timezone +from typing import Optional, Tuple + + +def parse_datetime_string(value: str) -> Optional[datetime]: + """ + Parses the given string into a datetime object and returns it, or if ill-formated then returns None. + The given string is assumed to be in the format "YYYY-MM-DD hh:mm:ss" and with an optional timezone + suffix in format "+hh:mm" or "+hh". Also allowed is just a date of the format "YYYY-MM-DD" in which + case a time of "00:00:00" is assumed. If no timezone is specified then the local timezone is assumed. + """ + if not isinstance(value, str) or not (value := normalize_spaces(value)): + return None + tz_hours = -1 + tz_minutes = -1 + if value.rfind("T") > 0: + value = value.replace("T", " ") + if (space := value.find(" ")) > 0 and (value_suffix := value[space + 1:]): + if (plus := value_suffix.rfind("+")) > 0 or (minus := value_suffix.rfind("-")) > 0: + value = normalize_spaces(value[:space] + " " + value_suffix[:(plus if plus > 0 else minus)]) + if value_tz := normalize_spaces(value_suffix[(plus if plus > 0 else minus) + 1:]): + if len(value_tz := value_tz.split(":")) == 2: + value_tz_hours = value_tz[0].strip() + value_tz_minutes = value_tz[1].strip() + else: + value_tz_hours = value_tz[0].strip() + value_tz_minutes = "0" + if value_tz_hours.isdigit() and value_tz_minutes.isdigit(): + tz_hours = int(value_tz_hours) + tz_minutes = int(value_tz_minutes) + if not (plus > 0): + tz_hours = -tz_hours + else: + value = value + " 00:00:00" + if tz_hours < 0 or tz_minutes < 0: + tz_hours, tz_minutes = get_local_timezone_hours_minutes() + try: + dt = datetime.strptime(value, "%Y-%m-%d %H:%M:%S") + tz = timezone(timedelta(hours=tz_hours, minutes=tz_minutes)) + return dt.replace(tzinfo=tz) + except Exception: + return None + + +def parse_date_string(value: str) -> Optional[datetime]: + """ + Parses the given string into a datetime object representing only a date and + returns it, or if ill-formated then returns None. The given string is assumed + to be in the format "YYYY-MM-DD"; if a given string of this format is suffixed + with a space or a "T" and ANYTHING else, then that trailing portion is ignored. + """ + if isinstance(value, str) and (value := normalize_spaces(value)): + if (separator := value.find(" ")) > 0 or (separator := value.find("T")) > 0: + value = value[:separator] + try: + return datetime.strptime(value, "%Y-%m-%d") + except Exception: + pass + + +def normalize_datetime_string(value: str) -> Optional[str]: + """ + Parses the given string into a datetime object and returns a string for that datetime in ISO-8601 format, + or if ill-formated then returns None. The given string is assumed to be in the format "YYYY-MM-DD hh:mm:ss" + and with an optional timezone suffix in format "+hh:mm" or "+hh". Also allowed is just a date of the + format "YYYY-MM-DD" in which case a time of "00:00:00" is assumed. If no timezone is specified then + the local timezone is assumed. The returned format looks like this: "2024-02-08T10:37:51-05:00" + """ + dt = parse_datetime_string(value) + return dt.isoformat() if dt else None + + +def normalize_date_string(value: str) -> Optional[str]: + """ + Parses the given string into a datetime object representing only a date and returns a string for that + date in ISO-8601 format, or if ill-formated then returns None. The given string is assumed to be in + the format "YYYY-MM-DD"; but if a given string of this format is suffixed with a space followed by + ANYTHING else, then that trailing portion is ignored. The returned format looks like this: "2024-02-08" + """ + d = parse_date_string(value) + return d.strftime("%Y-%m-%d") if d else None + + +def get_local_timezone_string() -> str: + """ + Returns current/local timezone in format like: "-05:00". + """ + tz_hours, tz_minutes = get_local_timezone_hours_minutes() + return f"{tz_hours:+03d}:{tz_minutes:02d}" + + +def get_local_timezone_hours_minutes() -> Tuple[int, int]: + """ + Returns a tuple with the integer hours and minutes offset for the current/local timezone. + """ + tz_minutes = datetime.now(timezone.utc).astimezone().utcoffset().total_seconds() / 60 + return int(tz_minutes // 60), int(abs(tz_minutes % 60)) diff --git a/dcicutils/misc_utils.py b/dcicutils/misc_utils.py index 6228f2f8c..219b3997f 100644 --- a/dcicutils/misc_utils.py +++ b/dcicutils/misc_utils.py @@ -2540,6 +2540,14 @@ def pad_to(target_size: int, data: list, *, padding=None): return data +def normalize_spaces(value: str) -> str: + """ + Returns the given string with multiple consecutive occurrences of whitespace + converted to a single space, and left and right trimmed of spaces. + """ + return re.sub(r"\s+", " ", value).strip() + + class JsonLinesReader: def __init__(self, fp, padded=False, padding=None): diff --git a/dcicutils/portal_object_utils.py b/dcicutils/portal_object_utils.py index cecc02a03..be93b1223 100644 --- a/dcicutils/portal_object_utils.py +++ b/dcicutils/portal_object_utils.py @@ -13,9 +13,11 @@ class PortalObject: _PROPERTY_DELETION_SENTINEL = RowReader.CELL_DELETION_SENTINEL - def __init__(self, portal_object: dict, portal: Portal = None, type: Optional[str] = None) -> None: - self._data = portal_object - self._portal = portal + def __init__(self, data: dict, portal: Portal = None, + schema: Optional[Union[dict, Schema]] = None, type: Optional[str] = None) -> None: + self._data = data if isinstance(data, dict) else {} + self._portal = portal if isinstance(portal, Portal) else None + self._schema = schema if isinstance(schema, dict) else (schema.data if isinstance(schema, Schema) else None) self._type = type if isinstance(type, str) and type else None @property @@ -29,7 +31,7 @@ def portal(self) -> Optional[Portal]: @property @lru_cache(maxsize=1) def type(self) -> Optional[str]: - return self._type or Portal.get_schema_type(self._data) + return self._type or Portal.get_schema_type(self._data) or (Schema(self._schema).type if self._schema else None) @property @lru_cache(maxsize=1) @@ -44,7 +46,7 @@ def uuid(self) -> Optional[str]: @property @lru_cache(maxsize=1) def schema(self) -> Optional[dict]: - return self._portal.get_schema(self.type) if self._portal else None + return self._schema if self._schema else (self._portal.get_schema(self.type) if self._portal else None) def copy(self) -> PortalObject: return PortalObject(deepcopy(self.data), portal=self.portal, type=self.type) diff --git a/dcicutils/portal_utils.py b/dcicutils/portal_utils.py index c854de30b..9b4e9b05c 100644 --- a/dcicutils/portal_utils.py +++ b/dcicutils/portal_utils.py @@ -88,9 +88,9 @@ def init_from_key(key: dict, server: Optional[str], unspecified: Optional[list] isinstance(secret := key.get("secret"), str) and secret): # noqa self._key = {"key": key_id, "secret": secret} if (isinstance(server, str) and server) or (isinstance(server := key.get("server"), str) and server): - if server := normalize_server(server): + if server := Portal._normalize_server(server): if isinstance(key_server := key.get("server"), str) and key_server: - if normalize_server(key_server) != server: + if Portal._normalize_server(key_server) != server: raise Exception(f"Portal server inconsistency: {server} vs {key_server}") self._key["server"] = self._server = server if not self._key: @@ -104,49 +104,21 @@ def init_from_key_pair(key_pair: tuple, server: Optional[str], unspecified: Opti def init_from_keys_file(keys_file: str, env: Optional[str], server: Optional[str], unspecified: Optional[list] = []) -> None: - try: - with io.open(keys_file := os.path.expanduser(keys_file)) as f: - keys = json.load(f) - except Exception: - raise Exception(f"Portal initialization error; cannot open keys-file: {keys_file}") - if isinstance(env, str) and env and isinstance(key := keys.get(env), dict): - init_from_key(key, server) - self._keys_file = keys_file - self._env = env - elif (isinstance(server, str) and (server := normalize_server(server)) and - (key := [keys[k] for k in keys if normalize_server(keys[k].get("server")) == server])): - init_from_key(key[0], server) - self._keys_file = keys_file - elif not env and len(keys) == 1 and (env := next(iter(keys))) and isinstance(key := keys[env], dict): + key, env = Portal._lookup_in_keys_file(keys_file, env, server, raise_exception=True) + if key: init_from_key(key, server) self._keys_file = keys_file self._env = env - else: - raise Exception(f"Portal initialization error;" - f" {env or server or None} not found in keys-file: {keys_file}") def init_from_env_server_app(env: str, server: str, app: Optional[str], unspecified: Optional[list] = None) -> None: - if keys_file := self._default_keys_file(app, env): + if keys_file := Portal._default_keys_file(app, env, server): init_from_keys_file(keys_file, env, server, unspecified=unspecified) else: init(unspecified=unspecified) self._env = env self._server = server - def normalize_server(server: str) -> Optional[str]: - prefix = "" - if (lowercase_server := server.lower()).startswith("http://"): - prefix = "http://" - elif lowercase_server.startswith("https://"): - prefix = "https://" - if prefix: - if (server := re.sub(r"/+", "/", server[len(prefix):])).startswith("/"): - server = server[1:] - if len(server) > 1 and server.endswith("/"): - server = server[:-1] - return prefix + server if server else None - if (valid_app := app) and not (valid_app := Portal._valid_app(app)): raise Exception(f"Portal initialization error; invalid app: {app}") self._app = valid_app @@ -166,7 +138,7 @@ def normalize_server(server: str) -> Optional[str]: init_from_env_server_app(arg, server, app, unspecified=[env]) elif (isinstance(env, str) and env) or (isinstance(server, str) and server): init_from_env_server_app(env, server, app, unspecified=[arg]) - elif not arg and (keys_file := self._default_keys_file(app=self._app or Portal.DEFAULT_APP, env=env)): + elif not arg and (keys_file := Portal._default_keys_file(self._app or Portal.DEFAULT_APP, env, server)): # If no initial arg then look for default app keys file. init_from_keys_file(keys_file, env, server) elif raise_exception: @@ -411,14 +383,57 @@ def _kwargs(self, **kwargs) -> dict: result_kwargs["timeout"] = timeout return result_kwargs - def _default_keys_file(self, app: Optional[str], env: Optional[str] = None) -> Optional[str]: + @staticmethod + def _default_keys_file(app: Optional[str], env: Optional[str], server: Optional[str]) -> Optional[str]: def infer_app_from_env(env: str) -> Optional[str]: # noqa if isinstance(env, str) and (lowercase_env := env.lower()): if app := [app for app in ORCHESTRATED_APPS if lowercase_env.startswith(app.lower())]: - return self._valid_app(app[0]) + return Portal._valid_app(app[0]) if (app := Portal._valid_app(app)) or (app := infer_app_from_env(env)): keys_file = os.path.expanduser(os.path.join(Portal.KEYS_FILE_DIRECTORY, f".{app.lower()}-keys.json")) return keys_file if os.path.exists(keys_file) else None + if not app: + for app in ORCHESTRATED_APPS: + if keys_file := Portal._default_keys_file(app, env, server): + if Portal._lookup_in_keys_file(keys_file, env, server)[0]: + return keys_file + + @staticmethod + def _lookup_in_keys_file(keys_file: str, env: Optional[str], server: Optional[str], + raise_exception: bool = False) -> Tuple[Optional[dict], Optional[str]]: + try: + with io.open(keys_file := os.path.expanduser(keys_file)) as f: + keys = json.load(f) + except Exception: + if raise_exception: + raise Exception(f"Portal initialization error; cannot open keys-file: {keys_file}") + return None, None + if isinstance(env, str) and env and isinstance(key := keys.get(env), dict): + return key, env + elif (isinstance(server, str) and (server := Portal._normalize_server(server)) and + (key := [keys[k] for k in keys if Portal._normalize_server(keys[k].get("server")) == server])): + return key[0], env + elif not env and len(keys) == 1 and (env := next(iter(keys))) and isinstance(key := keys[env], dict): + return key, env + else: + if raise_exception: + raise Exception(f"Portal initialization error;" + f" {env or server or None} not found in keys-file: {keys_file}") + return None, None + + @staticmethod + def _normalize_server(server: str) -> Optional[str]: + prefix = "" + if (lowercase_server := server.lower()).startswith("http://"): + prefix = "http://" + elif lowercase_server.startswith("https://"): + prefix = "https://" + if prefix: + if (server := re.sub(r"/+", "/", server[len(prefix):])).startswith("/"): + server = server[1:] + if len(server) > 1 and server.endswith("/"): + server = server[:-1] + return prefix + server if server else None @staticmethod def _valid_app(app: Optional[str]) -> Optional[str]: diff --git a/dcicutils/schema_utils.py b/dcicutils/schema_utils.py index 216886bc9..b5b1bf425 100644 --- a/dcicutils/schema_utils.py +++ b/dcicutils/schema_utils.py @@ -191,7 +191,9 @@ class Schema: def __init__(self, schema: dict, type: Optional[str] = None) -> None: self._data = schema if isinstance(schema, dict) else (schema.data if isinstance(schema, Schema) else {}) - self._type = (type if isinstance(type, str) else "") or Schema.type_name(self._data.get("title", "")) + self._type = ((type if isinstance(type, str) else "") or + Schema.type_name(self._data.get("title", "")) or + Schema.type_name(self._data.get("$id", ""))) @property def data(self) -> dict: diff --git a/dcicutils/scripts/view_portal_object.py b/dcicutils/scripts/view_portal_object.py new file mode 100644 index 000000000..87eefcb56 --- /dev/null +++ b/dcicutils/scripts/view_portal_object.py @@ -0,0 +1,198 @@ +# ------------------------------------------------------------------------------------------------------ +# Command-line utility to retrieve and print the given object (UUID) from a SMaHT/CGAP/Fourfront Portal. +# ------------------------------------------------------------------------------------------------------ +# Example command: +# view-portal-object 4483b19d-62e7-4e7f-a211-0395343a35df --yaml +# +# Example output: +# '@context': /terms/ +# '@id': /access-keys/3968e38e-c11f-472e-8531-8650e2e296d4/ +# '@type': +# - AccessKey +# - Item +# access_key_id: NSVCZ75O +# date_created: '2023-09-06T13:11:59.704005+00:00' +# description: Manually generated local access-key for testing. +# display_title: AccessKey from 2023-09-06 +# expiration_date: '2023-12-05T13:11:59.714106' +# last_modified: +# date_modified: '2023-09-06T13:11:59.711367+00:00' +# modified_by: +# '@id': /users/3202fd57-44d2-44fb-a131-afb1e43d8ae5/ +# '@type': +# - User +# - Item +# status: current +# uuid: 3202fd57-44d2-44fb-a131-afb1e43d8ae5 +# principals_allowed: +# edit: +# - group.admin +# - userid.74fef71a-dfc1-4aa4-acc0-cedcb7ac1d68 +# view: +# - group.admin +# - group.read-only-admin +# - userid.74fef71a-dfc1-4aa4-acc0-cedcb7ac1d68 +# schema_version: '1' +# status: current +# user: +# '@id': /users/74fef71a-dfc1-4aa4-acc0-cedcb7ac1d68/ +# '@type': +# - User +# - Item +# display_title: David Michaels +# principals_allowed: +# edit: +# - group.admin +# view: +# - group.admin +# - group.read-only-admin +# status: current +# uuid: 74fef71a-dfc1-4aa4-acc0-cedcb7ac1d68 +# uuid: 3968e38e-c11f-472e-8531-8650e2e296d4 +# +# Note that instead of a uuid you can also actually use a path, for example: +# view-local-object /file-formats/vcf_gz_tbi +# +# -------------------------------------------------------------------------------------------------- + +import argparse +import json +import pyperclip +import sys +from typing import Optional +import yaml +from dcicutils.captured_output import captured_output, uncaptured_output +from dcicutils.misc_utils import get_error_message +from dcicutils.portal_utils import Portal +from dcicutils.structured_data import Schema + + +def main(): + + parser = argparse.ArgumentParser(description="View Portal object.") + parser.add_argument("uuid", type=str, + help=f"The uuid (or path) of the object to fetch and view. ") + parser.add_argument("--ini", type=str, required=False, default=None, + help=f"Name of the application .ini file.") + parser.add_argument("--env", "-e", type=str, required=False, default=None, + help=f"Environment name (key from ~/.smaht-keys.json).") + parser.add_argument("--server", "-s", type=str, required=False, default=None, + help=f"Environment server name (server from key in ~/.smaht-keys.json).") + parser.add_argument("--app", type=str, required=False, default=None, + help=f"Application name (one of: smaht, cgap, fourfront).") + parser.add_argument("--schema", action="store_true", required=False, default=False, + help="View named schema rather than object.") + parser.add_argument("--raw", action="store_true", required=False, default=False, help="Raw output.") + parser.add_argument("--database", action="store_true", required=False, default=False, + help="Read from database output.") + parser.add_argument("--yaml", action="store_true", required=False, default=False, help="YAML output.") + parser.add_argument("--copy", "-c", action="store_true", required=False, default=False, + help="Copy object data to clipboard.") + parser.add_argument("--verbose", action="store_true", required=False, default=False, help="Verbose output.") + parser.add_argument("--debug", action="store_true", required=False, default=False, help="Debugging output.") + args = parser.parse_args() + + portal = _create_portal(ini=args.ini, env=args.env, server=args.server, app=args.app, debug=args.debug) + if args.uuid == "schemas": + _print_all_schema_names(portal=portal, verbose=args.verbose) + return + elif args.schema: + data = _get_schema(portal=portal, schema_name=args.uuid) + else: + data = _get_portal_object(portal=portal, uuid=args.uuid, raw=args.raw, + database=args.database, verbose=args.verbose) + + if args.copy: + pyperclip.copy(json.dumps(data, indent=4)) + if args.yaml: + _print(yaml.dump(data)) + else: + _print(json.dumps(data, default=str, indent=4)) + + +def _create_portal(ini: str, env: Optional[str] = None, + server: Optional[str] = None, app: Optional[str] = None, debug: bool = False) -> Portal: + with captured_output(not debug): + return Portal(env, server=server, app=app) if env or app else Portal(ini) + + +def _get_portal_object(portal: Portal, uuid: str, + raw: bool = False, database: bool = False, verbose: bool = False) -> dict: + if verbose: + _print(f"Getting object from Portal: {uuid}") + if portal.env: + _print(f"Portal environment: {portal.env}") + if portal.keys_file: + _print(f"Portal keys file: {portal.keys_file}") + if portal.key_id: + _print(f"Portal key prefix: {portal.key_id[0:2]}******") + if portal.ini_file: + _print(f"Portal ini file: {portal.ini_file}") + if portal.server: + _print(f"Portal server: {portal.server}") + response = None + try: + if not uuid.startswith("/"): + path = f"/{uuid}" + else: + path = uuid + response = portal.get(path, raw=raw, database=database) + except Exception as e: + if "404" in str(e) and "not found" in str(e).lower(): + _print(f"Portal object not found: {uuid}") + _exit_without_action() + _exit_without_action(f"Exception getting Portal object: {uuid}\n{get_error_message(e)}") + if not response: + _exit_without_action(f"Null response getting Portal object: {uuid}") + if response.status_code not in [200, 307]: + # TODO: Understand why the /me endpoint returns HTTP status code 307, which is only why we mention it above. + _exit_without_action(f"Invalid status code ({response.status_code}) getting Portal object: {uuid}") + if not response.json: + _exit_without_action(f"Invalid JSON getting Portal object: {uuid}") + if verbose: + _print("OK") + return response.json() + + +def _get_schema(portal: Portal, schema_name: str) -> Optional[dict]: + def rummage_for_schema_name(portal: Portal, schema_name: str) -> Optional[str]: # noqa + if schemas := portal.get_schemas(): + for schema in schemas: + if schema.lower() == schema_name.lower(): + return schema + schema = Schema.load_by_name(schema_name, portal) + if not schema: + if schema_name := rummage_for_schema_name(portal, schema_name): + schema = Schema.load_by_name(schema_name, portal) + return schema.data if schema else None + + +def _print_all_schema_names(portal: Portal, verbose: bool = False) -> None: + if schemas := portal.get_schemas(): + for schema in sorted(schemas.keys()): + _print(schema) + if verbose: + if identifying_properties := schemas[schema].get("identifyingProperties"): + _print("- identifying properties:") + for identifying_property in sorted(identifying_properties): + _print(f" - {identifying_property}") + if required_properties := schemas[schema].get("required"): + _print("- required properties:") + for required_property in sorted(required_properties): + _print(f" - {required_property}") + + +def _print(*args, **kwargs): + with uncaptured_output(): + print(*args, **kwargs) + sys.stdout.flush() + + +def _exit_without_action(message: Optional[str] = None) -> None: + if message: + _print(f"ERROR: {message}") + exit(1) + + +if __name__ == "__main__": + main() diff --git a/dcicutils/structured_data.py b/dcicutils/structured_data.py index 70c773609..ae3424203 100644 --- a/dcicutils/structured_data.py +++ b/dcicutils/structured_data.py @@ -10,8 +10,9 @@ from webtest.app import TestApp from dcicutils.common import OrchestratedApp from dcicutils.data_readers import CsvReader, Excel, RowReader +from dcicutils.datetime_utils import normalize_date_string, normalize_datetime_string from dcicutils.file_utils import search_for_file -from dcicutils.misc_utils import (create_dict, create_readonly_object, load_json_if, +from dcicutils.misc_utils import (create_dict, create_readonly_object, is_uuid, load_json_if, merge_objects, remove_empty_properties, right_trim, split_string, to_boolean, to_enum, to_float, to_integer, VirtualApp) from dcicutils.portal_object_utils import PortalObject @@ -203,6 +204,17 @@ def _load_excel_file(self, file: str) -> None: order = {Schema.type_name(key): index for index, key in enumerate(self._order)} if self._order else {} for sheet_name in sorted(excel.sheet_names, key=lambda key: order.get(Schema.type_name(key), sys.maxsize)): self._load_reader(excel.sheet_reader(sheet_name), type_name=Schema.type_name(sheet_name)) + # Check for unresolved reference errors which really are not because of ordering. + # Yes such internal references will be handled correctly on actual database update via snovault.loadxl. + if ref_errors := self.ref_errors: + ref_errors_actual = [] + for ref_error in ref_errors: + if not self.portal.ref_exists(ref_error["error"]): + ref_errors_actual.append(ref_error) + if ref_errors_actual: + self._errors["ref"] = ref_errors_actual + else: + del self._errors["ref"] def _load_json_file(self, file: str) -> None: with open(file) as f: @@ -368,28 +380,20 @@ class Schema(SchemaBase): def __init__(self, schema_json: dict, portal: Optional[Portal] = None) -> None: super().__init__(schema_json) -# self._data = schema_json if isinstance(schema_json, dict) else {} -# self._type = Schema.type_name(schema_json.get("title", "")) self._portal = portal # Needed only to resolve linkTo references. self._map_value_functions = { "boolean": self._map_function_boolean, "enum": self._map_function_enum, "integer": self._map_function_integer, "number": self._map_function_number, - "string": self._map_function_string + "string": self._map_function_string, + "date": self._map_function_date, + "datetime": self._map_function_datetime } self._resolved_refs = set() self._unresolved_refs = [] self._typeinfo = self._create_typeinfo(schema_json) -# @property -# def data(self) -> dict: -# return self._data - -# @property -# def type(self) -> str: -# return self._type - @staticmethod def load_by_name(name: str, portal: Portal) -> Optional[dict]: schema_json = portal.get_schema(Schema.type_name(name)) if portal else None @@ -424,6 +428,10 @@ def _map_function(self, typeinfo: dict) -> Optional[Callable]: map_function = self._map_function_enum elif isinstance(typeinfo.get("linkTo"), str): map_function = self._map_function_ref + elif (type_format := typeinfo.get("format")) == "date": + map_function = self._map_function_date + elif type_format == "date-time": + map_function = self._map_function_datetime else: map_function = self._map_value_functions.get(typeinfo_type) return map_function(typeinfo) if map_function else None @@ -454,6 +462,18 @@ def map_string(value: str, src: Optional[str]) -> str: return value if value is not None else "" return map_string + def _map_function_date(self, typeinfo: dict) -> Callable: + def map_date(value: str, src: Optional[str]) -> str: + value = normalize_date_string(value) + return value if value is not None else "" + return map_date + + def _map_function_datetime(self, typeinfo: dict) -> Callable: + def map_datetime(value: str, src: Optional[str]) -> str: + value = normalize_datetime_string(value) + return value if value is not None else "" + return map_datetime + def _map_function_ref(self, typeinfo: dict) -> Callable: def map_ref(value: str, link_to: str, portal: Optional[Portal], src: Optional[str]) -> Any: nonlocal self, typeinfo @@ -661,7 +681,13 @@ def is_file_schema(self, schema_name: str) -> bool: """ return self.is_schema_type(schema_name, FILE_SCHEMA_NAME) - def ref_exists(self, type_name: str, value: str) -> List[str]: + def ref_exists(self, type_name: str, value: Optional[str] = None) -> List[str]: + if not value: + if type_name.startswith("/") and len(parts := type_name[1:].split("/")) == 2: + type_name = parts[0] + value = parts[1] + else: + return [] resolved = [] is_resolved, resolved_uuid = self._ref_exists_single(type_name, value) if is_resolved: @@ -691,7 +717,7 @@ def _ref_exists_single(self, type_name: str, value: str) -> Tuple[bool, Optional for item in items: if (ivalue := next((item[iproperty] for iproperty in iproperties if iproperty in item), None)): if isinstance(ivalue, list) and value in ivalue or ivalue == value: - return True, None + return True, (ivalue if isinstance(ivalue, str) and is_uuid(ivalue) else None) if (value := self.get_metadata(f"/{type_name}/{value}")) is None: return False, None return True, value.get("uuid") diff --git a/docs/source/dcicutils.rst b/docs/source/dcicutils.rst index 11dec1cf5..1fca86c93 100644 --- a/docs/source/dcicutils.rst +++ b/docs/source/dcicutils.rst @@ -30,6 +30,13 @@ bundle_utils :members: +captured_output +^^^^^^^^^^^^^^^ + +.. automodule:: dcicutils.captured_output + :members: + + codebuild_utils ^^^^^^^^^^^^^^^ @@ -93,6 +100,13 @@ data_utils :members: +datetime_utils +^^^^^^^^^^^^^^ + +.. automodule:: dcicutils.datetime_utils + :members: + + deployment_utils ^^^^^^^^^^^^^^^^ diff --git a/poetry.lock b/poetry.lock index 9c93ec47a..19e24dc54 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1333,6 +1333,16 @@ cryptography = ">=38.0.0,<40.0.0 || >40.0.0,<40.0.1 || >40.0.1,<42" docs = ["sphinx (!=5.2.0,!=5.2.0.post0)", "sphinx-rtd-theme"] test = ["flaky", "pretend", "pytest (>=3.0.1)"] +[[package]] +name = "pyperclip" +version = "1.8.2" +description = "A cross-platform clipboard module for Python. (Only handles plain text for now.)" +optional = false +python-versions = "*" +files = [ + {file = "pyperclip-1.8.2.tar.gz", hash = "sha256:105254a8b04934f0bc84e9c24eb360a591aaf6535c9def5f29d92af107a9bf57"}, +] + [[package]] name = "pyramid" version = "1.10.4" @@ -2092,4 +2102,4 @@ testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"] [metadata] lock-version = "2.0" python-versions = ">=3.8,<3.12" -content-hash = "f809dee9b269816495aebe98b7299657df63d20446e140df3635955ca358c861" +content-hash = "e921aaa6ed6c8f604a55b2fdff146f4a01429e9ac7653a36be3078c38b654094" diff --git a/pyproject.toml b/pyproject.toml index d6c91a125..0583b995f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "dcicutils" -version = "8.7.2" +version = "8.8.0" description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources" authors = ["4DN-DCIC Team "] license = "MIT" @@ -56,6 +56,7 @@ PyJWT = "^2.6.0" pyramid = "1.10.4" pytz = ">=2020.4" redis = "^4.5.1" +pyperclip = "^1.8.2" PyYAML = "^6.0.1" requests = "^2.21.0" rfc3986 = "^1.4.0" @@ -88,6 +89,7 @@ pytest-runner = ">=5.1" publish-to-pypi = "dcicutils.scripts.publish_to_pypi:main" show-contributors = "dcicutils.contribution_scripts:show_contributors_main" run-license-checker = "dcicutils.scripts.run_license_checker:main" +view-portal-object = "dcicutils.scripts.view_portal_object:main" [tool.pytest.ini_options] diff --git a/test/test_datetime_utils.py b/test/test_datetime_utils.py new file mode 100644 index 000000000..d969beb60 --- /dev/null +++ b/test/test_datetime_utils.py @@ -0,0 +1,30 @@ +from dcicutils.datetime_utils import get_local_timezone_string, normalize_date_string, normalize_datetime_string + + +def test_normalize_datetime_string(): + + tz = get_local_timezone_string() + + value = "2024-02-08T10:37:51-05:00" + assert normalize_datetime_string(value) == "2024-02-08T10:37:51" + tz + + value = " 2024-01-28 17:15:32" + assert normalize_datetime_string(value) == "2024-01-28T17:15:32" + tz + + value = "2024-02-08" + assert normalize_datetime_string(value) == "2024-02-08T00:00:00" + tz + + value = " 2024-01-28 17:15:32 + 03:34" + assert normalize_datetime_string(value) == "2024-01-28T17:15:32+03:34" + + +def test_normalize_date_string(): + + value = " 2024-01-28" + assert normalize_date_string(value) == "2024-01-28" + + value = "2024-02-08T10:37:51-05:00" + assert normalize_date_string(value) == "2024-02-08" + + value = " 2024-01-28 17:15:32 + 03:34" + assert normalize_date_string(value) == "2024-01-28"