diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 5d382a007..1a1aa2ab9 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -6,6 +6,20 @@ dcicutils Change Log ---------- +8.13.3 +====== +* N.B. Accidentially tagged/pushed 8.13.1 -> PLEASE IGNORE VERSION: 8.13.1 (subsequently yanked). + And then to correct (while no permission to delete above) pushed unofficial 8.13.2. +* Fallout from Python 3.12 support. + - Though dcicutils is not dependent on numpy, elasticsearch tries to import it, + and if it is installed and if it is a version greater than 1.x, we get this error: + AttributeError: `np.float_` was removed in the NumPy 2.0 release. Use `np.float64` instead. + So added a hack in hack_for_elasticsearch_numpy_usage.py for this specific case; + to be imported before we import elasticsearch modules. +* Added/updated scripts from submitr: view_portal_object.py and update_portal_object.py + for dev/troubleshooting purposes. + + 8.13.0 ====== * Updates related to Python 3.12. diff --git a/Makefile b/Makefile index f4b8dccb2..271001712 100644 --- a/Makefile +++ b/Makefile @@ -18,61 +18,69 @@ build: # builds test: # runs default tests, which are the unit tests make test-units make test-static + make test-last test-for-ga: poetry run flake8 dcicutils poetry run flake8 test --exclude=data_files make test-units-with-coverage + make test-last + +test-last: + poetry run pytest -vv -m "last" retest: # runs only failed tests from the last test run. (if no failures, it seems to run all?? -kmp 17-Dec-2020) - poetry run pytest -vv -r w --last-failed + poetry run pytest -vv -r w --last-failed -m "not last" test-all: # you have to be really brave to want this. a lot of things will err @git log -1 --decorate | head -1 @date - poetry run pytest -vv -r w + poetry run pytest -vv -r w -m "not last" + make test-last @git log -1 --decorate | head -1 @date test-most: # leaves out things that will probably err but runs unit tests and both kinds of integrations @git log -1 --decorate | head -1 @date - poetry run pytest -vv -r w -m "not static and not beanstalk_failure and not direct_es_query" + poetry run pytest -vv -r w -m "not static and not beanstalk_failure and not direct_es_query and not last" @git log -1 --decorate | head -1 @date test-units-with-coverage: @git log -1 --decorate | head -1 @date - poetry run coverage run --source dcicutils -m pytest -vv -r w -m "not static and not integratedx and not beanstalk_failure and not direct_es_query" + poetry run coverage run --source dcicutils -m pytest -vv -r w -m "not static and not integratedx and not beanstalk_failure and not direct_es_query and not last" + make test-last @git log -1 --decorate | head -1 @date test-units: # runs unit tests (and integration tests not backed by a unit test) @git log -1 --decorate | head -1 @date - poetry run pytest -vv -r w -m "not static and not integratedx and not beanstalk_failure and not direct_es_query" + poetry run pytest -vv -r w -m "not static and not integratedx and not beanstalk_failure and not direct_es_query and not last" + make test-last @git log -1 --decorate | head -1 @date test-integrations: # runs integration tests @git log -1 --decorate | head -1 @date - poetry run pytest -vv -r w -m "not static and (integrated or integratedx) and not beanstalk_failure and not direct_es_query" + poetry run pytest -vv -r w -m "not static and (integrated or integratedx) and not beanstalk_failure and not direct_es_query and not last" @git log -1 --decorate | head -1 @date test-direct-es-query: # must be called inside VPC (e.g., from foursight after cloning repo, setting up venv, etc) @git log -1 --decorate | head -1 @date - poetry run pytest -vv -r w -m "direct_es_query" + poetry run pytest -vv -r w -m "direct_es_query and not last" @git log -1 --decorate | head -1 @date test-static: @git log -1 --decorate | head -1 @date - poetry run pytest -vv -r w -m "static" + poetry run pytest -vv -r w -m "static and not last" poetry run flake8 dcicutils poetry run flake8 test --exclude=data_files @git log -1 --decorate | head -1 diff --git a/dcicutils/es_utils.py b/dcicutils/es_utils.py index d1fd52a57..237b563be 100644 --- a/dcicutils/es_utils.py +++ b/dcicutils/es_utils.py @@ -1,6 +1,7 @@ import logging import boto3 from .misc_utils import PRINT +import dcicutils.hack_for_elasticsearch_numpy_usage # noqa from elasticsearch import Elasticsearch, RequestsHttpConnection from aws_requests_auth.boto_utils import BotoAWSRequestsAuth diff --git a/dcicutils/ff_utils.py b/dcicutils/ff_utils.py index 442a9642e..d2456fc1f 100644 --- a/dcicutils/ff_utils.py +++ b/dcicutils/ff_utils.py @@ -7,6 +7,7 @@ import time from collections import namedtuple +import dcicutils.hack_for_elasticsearch_numpy_usage # noqa from elasticsearch.exceptions import AuthorizationException from typing import Dict, List, Optional from urllib.parse import parse_qs, urlencode, urlparse, urlunparse diff --git a/dcicutils/hack_for_elasticsearch_numpy_usage.py b/dcicutils/hack_for_elasticsearch_numpy_usage.py new file mode 100644 index 000000000..230d20b14 --- /dev/null +++ b/dcicutils/hack_for_elasticsearch_numpy_usage.py @@ -0,0 +1,10 @@ +# Though dcicutils is not dependent on numpy, elasticsearch pulls it in iff it is installed, +# and if it is numpy 2.x the numpy.float_ constant has been retired and any reference to it +# yields an error from numpy (AttributeError: np.float_ was removed in the NumPy 2.0 release. +# Use np.float64 instead); this reference to numpy.float_ occurs in elasticsearch/serializer.py. +# and we short-circuit it here by explicitly setting numpy.float_ to numpyh.float64. +try: + import numpy + numpy.float_ = numpy.float64 +except Exception: + pass diff --git a/dcicutils/scripts/update_portal_object.py b/dcicutils/scripts/update_portal_object.py new file mode 100644 index 000000000..0918b8f26 --- /dev/null +++ b/dcicutils/scripts/update_portal_object.py @@ -0,0 +1,430 @@ +# ------------------------------------------------------------------------------------------------------ +# Command-line utility to update (post, patch, upsert) portal objects for SMaHT/CGAP/Fourfront. +# ------------------------------------------------------------------------------------------------------ +# Example commands: +# update-portal-object --post file_format.json +# update-portal-object --upsert directory-with-schema-named-dot-json-files +# update-portal-object --patch file-not-named-for-schema-name.json --schema UnalignedReads +# -------------------------------------------------------------------------------------------------- + +import argparse +from functools import lru_cache +import glob +import io +import json +import os +import sys +from typing import Callable, List, Optional, Tuple, Union +from dcicutils.command_utils import yes_or_no +from dcicutils.common import ORCHESTRATED_APPS, APP_SMAHT +from dcicutils.ff_utils import delete_metadata, purge_metadata +from dcicutils.misc_utils import get_error_message, PRINT +from dcicutils.portal_utils import Portal as PortalFromUtils + + +class Portal(PortalFromUtils): + + def delete_metadata(self, object_id: str) -> Optional[dict]: + if isinstance(object_id, str) and object_id and self.key: + return delete_metadata(obj_id=object_id, key=self.key) + return None + + def purge_metadata(self, object_id: str) -> Optional[dict]: + if isinstance(object_id, str) and object_id and self.key: + return purge_metadata(obj_id=object_id, key=self.key) + return None + + +_DEFAULT_APP = "smaht" +_SMAHT_ENV_ENVIRON_NAME = "SMAHT_ENV" + +# Schema properties to ignore (by default) for the view schema usage. +_SCHEMAS_IGNORE_PROPERTIES = [ + "date_created", + "last_modified", + "principals_allowed", + "submitted_by", + "schema_version" +] + +_SCHEMA_ORDER = [ # See: smaht-portal/src/encoded/project/loadxl.py + "access_key", + "user", + "consortium", + "submission_center", + "file_format", + "quality_metric", + "output_file", + "reference_file", + "reference_genome", + "software", + "tracking_item", + "workflow", + "workflow_run", + "meta_workflow", + "meta_workflow_run", + "image", + "document", + "static_section", + "page", + "filter_set", + "higlass_view_config", + "ingestion_submission", + "ontology_term", + "protocol", + "donor", + "demographic", + "medical_history", + "diagnosis", + "exposure", + "family_history", + "medical_treatment", + "death_circumstances", + "tissue_collection", + "tissue", + "histology", + "cell_line", + "cell_culture", + "cell_culture_mixture", + "preparation_kit", + "treatment", + "sample_preparation", + "tissue_sample", + "cell_culture_sample", + "cell_sample", + "analyte", + "analyte_preparation", + "assay", + "library", + "library_preparation", + "sequencer", + "basecalling", + "sequencing", + "file_set", + "unaligned_reads", + "aligned_reads", + "variant_calls", +] + + +def main(): + + parser = argparse.ArgumentParser(description="View Portal object.") + parser.add_argument("--env", "-e", type=str, required=False, default=None, + help=f"Environment name (key from ~/.smaht-keys.json).") + parser.add_argument("--app", type=str, required=False, default=None, + help=f"Application name (one of: smaht, cgap, fourfront).") + parser.add_argument("--schema", type=str, required=False, default=None, + help="Use named schema rather than infer from post/patch/upsert file name.") + parser.add_argument("--post", type=str, required=False, default=None, help="POST data.") + parser.add_argument("--patch", type=str, required=False, default=None, help="PATCH data.") + parser.add_argument("--upsert", type=str, required=False, default=None, help="Upsert data.") + parser.add_argument("--delete", type=str, required=False, default=None, help="Delete data.") + parser.add_argument("--purge", type=str, required=False, default=None, help="Purge data.") + parser.add_argument("--confirm", action="store_true", required=False, default=False, help="Confirm before action.") + parser.add_argument("--verbose", action="store_true", required=False, default=False, help="Verbose output.") + parser.add_argument("--quiet", action="store_true", required=False, default=False, help="Quiet output.") + parser.add_argument("--debug", action="store_true", required=False, default=False, help="Debugging output.") + args = parser.parse_args() + + def usage(message: Optional[str] = None) -> None: + nonlocal parser + _print(message) if isinstance(message, str) else None + parser.print_help() + sys.exit(1) + + if app := args.app: + if (app not in ORCHESTRATED_APPS) and ((app := app.lower()) not in ORCHESTRATED_APPS): + usage(f"ERROR: Unknown app name; must be one of: {' | '.join(ORCHESTRATED_APPS)}") + else: + app = APP_SMAHT + + portal = _create_portal(env=args.env, app=app, verbose=args.verbose, debug=args.debug) + + if explicit_schema_name := args.schema: + schema, explicit_schema_name = _get_schema(portal, explicit_schema_name) + if not schema: + usage(f"ERROR: Unknown schema name: {args.schema}") + + if not (args.post or args.patch or args.upsert or args.delete or args.purge): + usage() + + if args.post: + _post_or_patch_or_upsert(portal=portal, + file_or_directory=args.post, + explicit_schema_name=explicit_schema_name, + update_function=post_data, + update_action_name="POST", + confirm=args.confirm, verbose=args.verbose, quiet=args.quiet, debug=args.debug) + if args.patch: + _post_or_patch_or_upsert(portal=portal, + file_or_directory=args.patch, + explicit_schema_name=explicit_schema_name, + update_function=patch_data, + update_action_name="PATCH", + confirm=args.confirm, verbose=args.verbose, quiet=args.quiet, debug=args.debug) + if args.upsert: + _post_or_patch_or_upsert(portal=portal, + file_or_directory=args.upsert, + explicit_schema_name=explicit_schema_name, + update_function=upsert_data, + update_action_name="UPSERT", + confirm=args.confirm, verbose=args.verbose, quiet=args.quiet, debug=args.debug) + + if args.delete: + if not portal.get_metadata(args.delete, raise_exception=False): + _print(f"Cannot find given object: {args.delete}") + sys.exit(1) + if yes_or_no(f"Do you really want to delete this item: {args.delete} ?"): + portal.delete_metadata(args.delete) + + if args.purge: + if not portal.get_metadata(args.purge, raise_exception=False): + _print(f"Cannot find given object: {args.purge}") + sys.exit(1) + if yes_or_no(f"Do you really want to purge this item: {args.purge} ?"): + portal.delete_metadata(args.purge) + portal.purge_metadata(args.purge) + + +def _post_or_patch_or_upsert(portal: Portal, file_or_directory: str, + explicit_schema_name: str, + update_function: Callable, update_action_name: str, + confirm: bool = False, verbose: bool = False, + quiet: bool = False, debug: bool = False) -> None: + + def is_schema_name_list(portal: Portal, keys: list) -> bool: + if isinstance(keys, list): + for key in keys: + if portal.get_schema(key) is None: + return False + return True + return False + + def post_or_patch_or_upsert(portal: Portal, file: str, schema_name: Optional[str], + confirm: bool = False, verbose: bool = False, + quiet: bool = False, debug: bool = False) -> None: + + nonlocal update_function, update_action_name + if not quiet: + _print(f"Processing {update_action_name} file: {file}") + if data := _read_json_from_file(file): + if isinstance(data, dict): + if isinstance(schema_name, str) and schema_name: + if debug: + _print(f"DEBUG: File ({file}) contains an object of type: {schema_name}") + update_function(portal, data, schema_name, confirm=confirm, + file=file, verbose=verbose, debug=debug) + elif is_schema_name_list(portal, list(data.keys())): + if debug: + _print(f"DEBUG: File ({file}) contains a dictionary of schema names.") + for schema_name in data: + if isinstance(schema_data := data[schema_name], list): + if debug: + _print(f"DEBUG: Processing {update_action_name}s for type: {schema_name}") + for index, item in enumerate(schema_data): + update_function(portal, item, schema_name, confirm=confirm, + file=file, index=index, verbose=verbose, debug=debug) + else: + _print(f"WARNING: File ({file}) contains schema item which is not a list: {schema_name}") + else: + _print(f"WARNING: File ({file}) contains unknown item type.") + elif isinstance(data, list): + if debug: + _print(f"DEBUG: File ({file}) contains a list of objects of type: {schema_name}") + for index, item in enumerate(data): + update_function(portal, item, schema_name, confirm=confirm, + file=file, index=index, verbose=verbose, debug=debug) + if debug: + _print(f"DEBUG: Processing {update_action_name} file done: {file}") + + if os.path.isdir(file_or_directory): + if ((files := glob.glob(os.path.join(file_or_directory, "*.json"))) and + (files_and_schemas := _file_names_to_ordered_file_and_schema_names(portal, files))): # noqa + for file_and_schema in files_and_schemas: + if not (file := file_and_schema[0]): + continue + if not (schema_name := file_and_schema[1]) and not (schema_name := explicit_schema_name): + _print(f"ERROR: Schema cannot be inferred from file name and --schema not specified: {file}") + continue + post_or_patch_or_upsert(portal, file_and_schema[0], schema_name=schema_name, + confirm=confirm, quiet=quiet, verbose=verbose, debug=debug) + elif os.path.isfile(file := file_or_directory): + if ((schema_name := _get_schema_name_from_schema_named_json_file_name(portal, file)) or + (schema_name := explicit_schema_name)): # noqa + post_or_patch_or_upsert(portal, file, schema_name=schema_name, + confirm=confirm, quiet=quiet, verbose=verbose, debug=debug) + else: + post_or_patch_or_upsert(portal, file, schema_name=schema_name, + confirm=confirm, quiet=quiet, verbose=verbose, debug=debug) + # _print(f"ERROR: Schema cannot be inferred from file name and --schema not specified: {file}") + # return + else: + _print(f"ERROR: Cannot find file or directory: {file_or_directory}") + + +def post_data(portal: Portal, data: dict, schema_name: str, confirm: bool = False, + file: Optional[str] = None, index: int = 0, + verbose: bool = False, debug: bool = False) -> None: + if not (identifying_path := portal.get_identifying_path(data, portal_type=schema_name)): + if isinstance(file, str) and isinstance(index, int): + _print(f"ERROR: Item for POST has no identifying property: {file} (#{index + 1})") + else: + _print(f"ERROR: Item for POST has no identifying property.") + return + if portal.get_metadata(identifying_path, raise_exception=False): + _print(f"ERROR: Item for POST already exists: {identifying_path}") + return + if (confirm is True) and not yes_or_no(f"POST data for: {identifying_path} ?"): + return + if verbose: + _print(f"POST {schema_name} item: {identifying_path}") + try: + portal.post_metadata(schema_name, data) + if debug: + _print(f"DEBUG: POST {schema_name} item done: {identifying_path}") + except Exception as e: + _print(f"ERROR: Cannot POST {schema_name} item: {identifying_path}") + _print(get_error_message(e)) + return + + +def patch_data(portal: Portal, data: dict, schema_name: str, confirm: bool = False, + file: Optional[str] = None, index: int = 0, + verbose: bool = False, debug: bool = False) -> None: + if not (identifying_path := portal.get_identifying_path(data, portal_type=schema_name)): + if isinstance(file, str) and isinstance(index, int): + _print(f"ERROR: Item for PATCH has no identifying property: {file} (#{index + 1})") + else: + _print(f"ERROR: Item for PATCH has no identifying property.") + return + if not portal.get_metadata(identifying_path, raise_exception=False): + _print(f"ERROR: Item for PATCH does not already exist: {identifying_path}") + return + if (confirm is True) and not yes_or_no(f"PATCH data for: {identifying_path}"): + return + if verbose: + _print(f"PATCH {schema_name} item: {identifying_path}") + try: + portal.patch_metadata(identifying_path, data) + if debug: + _print(f"DEBUG: PATCH {schema_name} item OK: {identifying_path}") + except Exception as e: + _print(f"ERROR: Cannot PATCH {schema_name} item: {identifying_path}") + _print(e) + return + + +def upsert_data(portal: Portal, data: dict, schema_name: str, confirm: bool = False, + file: Optional[str] = None, index: int = 0, + verbose: bool = False, debug: bool = False) -> None: + if not (identifying_path := portal.get_identifying_path(data, portal_type=schema_name)): + if isinstance(file, str) and isinstance(index, int): + _print(f"ERROR: Item for UPSERT has no identifying property: {file} (#{index + 1})") + else: + _print(f"ERROR: Item for UPSERT has no identifying property.") + return + exists = portal.get_metadata(identifying_path, raise_exception=False) + if ((confirm is True) and not yes_or_no(f"{'PATCH' if exists else 'POST'} data for: {identifying_path} ?")): + return + if verbose: + _print(f"{'PATCH' if exists else 'POST'} {schema_name} item: {identifying_path}") + try: + portal.post_metadata(schema_name, data) if not exists else portal.patch_metadata(identifying_path, data) + if debug: + _print(f"DEBUG: UPSERT {schema_name} item OK: {identifying_path}") + except Exception as e: + _print(f"ERROR: Cannot UPSERT {schema_name} item: {identifying_path}") + _print(e) + return + + +def _create_portal(env: Optional[str] = None, app: Optional[str] = None, + verbose: bool = False, debug: bool = False) -> Optional[Portal]: + + env_from_environ = None + if not env and (app == APP_SMAHT): + if env := os.environ.get(_SMAHT_ENV_ENVIRON_NAME): + env_from_environ = True + if not (portal := Portal(env, app=app) if env or app else None): + return None + if verbose: + if (env := portal.env) or (env := os.environ(_SMAHT_ENV_ENVIRON_NAME)): + _print(f"Portal environment" + f"{f' (from {_SMAHT_ENV_ENVIRON_NAME})' if env_from_environ else ''}: {portal.env}") + if portal.keys_file: + _print(f"Portal keys file: {portal.keys_file}") + if portal.key_id: + _print(f"Portal key prefix: {portal.key_id[0:2]}******") + if portal.server: + _print(f"Portal server: {portal.server}") + return portal + + +def _read_json_from_file(file: str) -> Optional[dict]: + try: + if not os.path.exists(file): + return None + with io.open(file, "r") as f: + try: + return json.load(f) + except Exception: + _print(f"ERROR: Cannot load JSON from file: {file}") + return None + except Exception: + _print(f"ERROR: Cannot open file: {file}") + return None + + +def _file_names_to_ordered_file_and_schema_names(portal: Portal, + files: Union[List[str], str]) -> List[Tuple[str, Optional[str]]]: + results = [] + if isinstance(files, str): + files = [files] + if not isinstance(files, list): + return results + for file in files: + if isinstance(file, str) and file: + results.append((file, _get_schema_name_from_schema_named_json_file_name(portal, file))) + ordered_results = [] + for schema_name in _SCHEMA_ORDER: + schema_name = portal.schema_name(schema_name) + if result := next((item for item in results if item[1] == schema_name), None): + ordered_results.append(result) + results.remove(result) + ordered_results.extend(results) if results else None + return ordered_results + + +def _get_schema_name_from_schema_named_json_file_name(portal: Portal, value: str) -> Optional[str]: + try: + if not value.endswith(".json"): + return None + _, schema_name = _get_schema(portal, os.path.basename(value[:-5])) + return schema_name + except Exception: + return False + + +@lru_cache(maxsize=1) +def _get_schemas(portal: Portal) -> Optional[dict]: + return portal.get_schemas() + + +@lru_cache(maxsize=100) +def _get_schema(portal: Portal, name: str) -> Tuple[Optional[dict], Optional[str]]: + if portal and name and (name := name.replace("_", "").replace("-", "").strip().lower()): + if schemas := _get_schemas(portal): + for schema_name in schemas: + if schema_name.replace("_", "").replace("-", "").strip().lower() == name.lower(): + return schemas[schema_name], schema_name + return None, None + + +def _print(*args, **kwargs) -> None: + PRINT(*args, **kwargs) + sys.stdout.flush() + + +if __name__ == "__main__": + main() diff --git a/dcicutils/scripts/view_portal_object.py b/dcicutils/scripts/view_portal_object.py index a6f2369be..bc28ccc12 100644 --- a/dcicutils/scripts/view_portal_object.py +++ b/dcicutils/scripts/view_portal_object.py @@ -62,9 +62,10 @@ import pyperclip import os import sys -from typing import Callable, List, Optional, Tuple +from typing import Callable, List, Optional, TextIO, Tuple, Union import yaml from dcicutils.captured_output import captured_output, uncaptured_output +from dcicutils.command_utils import yes_or_no from dcicutils.misc_utils import get_error_message, is_uuid, PRINT from dcicutils.portal_utils import Portal @@ -78,11 +79,15 @@ "schema_version" ] +_output_file: TextIO = None + def main(): + global _output_file + parser = argparse.ArgumentParser(description="View Portal object.") - parser.add_argument("uuid", type=str, + parser.add_argument("uuid", nargs="?", type=str, help=f"The uuid (or path) of the object to fetch and view. ") parser.add_argument("--ini", type=str, required=False, default=None, help=f"Name of the application .ini file.") @@ -97,11 +102,9 @@ def main(): parser.add_argument("--all", action="store_true", required=False, default=False, help="Include all properties for schema usage.") parser.add_argument("--raw", action="store_true", required=False, default=False, help="Raw output.") + parser.add_argument("--inserts", action="store_true", required=False, default=False, + help="Format output for subsequent inserts.") parser.add_argument("--tree", action="store_true", required=False, default=False, help="Tree output for schemas.") - parser.add_argument("--post", type=str, required=False, default=None, - help="POST data of the main arg type with data from file specified with this option.") - parser.add_argument("--patch", type=str, required=False, default=None, - help="PATCH data of the main arg type with data from file specified with this option.") parser.add_argument("--database", action="store_true", required=False, default=False, help="Read from database output.") parser.add_argument("--bool", action="store_true", required=False, @@ -109,6 +112,7 @@ def main(): parser.add_argument("--yaml", action="store_true", required=False, default=False, help="YAML output.") parser.add_argument("--copy", "-c", action="store_true", required=False, default=False, help="Copy object data to clipboard.") + parser.add_argument("--output", required=False, help="Output file.", type=str) parser.add_argument("--indent", required=False, default=False, help="Indent output.", type=int) parser.add_argument("--details", action="store_true", required=False, default=False, help="Detailed output.") parser.add_argument("--more-details", action="store_true", required=False, default=False, @@ -123,54 +127,57 @@ def main(): portal = _create_portal(ini=args.ini, env=args.env or os.environ.get("SMAHT_ENV"), server=args.server, app=args.app, verbose=args.verbose, debug=args.debug) - if args.uuid.lower() == "schemas" or args.uuid.lower() == "schema": + if not args.uuid: + _print("UUID or schema or path required.") + _exit(1) + + if args.output: + if os.path.exists(args.output): + if os.path.isdir(args.output): + _print(f"Specified output file already exists as a directory: {args.output}") + _exit(1) + elif os.path.isfile(args.output): + _print(f"Specified output file already exists: {args.output}") + if not yes_or_no(f"Do you want to overwrite this file?"): + _exit(0) + _output_file = io.open(args.output, "w") + + if args.uuid and ((args.uuid.lower() == "schemas") or (args.uuid.lower() == "schema")): _print_all_schema_names(portal=portal, details=args.details, more_details=args.more_details, all=args.all, tree=args.tree, raw=args.raw, raw_yaml=args.yaml) return - elif args.uuid.lower() == "info": # TODO: need word for what consortiums and submission centers are collectively + elif args.uuid and (args.uuid.lower() == "info"): if consortia := portal.get_metadata("/consortia?limit=1000"): - _print("Known Consortia:") + _print_output("Known Consortia:") consortia = sorted(consortia.get("@graph", []), key=lambda key: key.get("identifier")) for consortium in consortia: if ((consortium_name := consortium.get("identifier")) and (consortium_uuid := consortium.get("uuid"))): # noqa - _print(f"- {consortium_name}: {consortium_uuid}") + _print_output(f"- {consortium_name}: {consortium_uuid}") if submission_centers := portal.get_metadata("/submission-centers?limit=1000"): - _print("Known Submission Centers:") + _print_output("Known Submission Centers:") submission_centers = sorted(submission_centers.get("@graph", []), key=lambda key: key.get("identifier")) for submission_center in submission_centers: if ((submission_center_name := submission_center.get("identifier")) and (submission_center_uuid := submission_center.get("uuid"))): # noqa - _print(f"- {submission_center_name}: {submission_center_uuid}") + _print_output(f"- {submission_center_name}: {submission_center_uuid}") try: if file_formats := portal.get_metadata("/file-formats?limit=1000"): - _print("Known File Formats:") + _print_output("Known File Formats:") file_formats = sorted(file_formats.get("@graph", []), key=lambda key: key.get("identifier")) for file_format in file_formats: if ((file_format_name := file_format.get("identifier")) and (file_format_uuid := file_format.get("uuid"))): # noqa - _print(f"- {file_format_name}: {file_format_uuid}") + _print_output(f"- {file_format_name}: {file_format_uuid}") except Exception: - _print("Known File Formats: None") + _print_output("Known File Formats: None") return if _is_maybe_schema_name(args.uuid): args.schema = True if args.schema: - if args.post: - if post_data := _read_json_from_file(args.post): - if args.verbose: - _print(f"POSTing data from file ({args.post}) as type: {args.uuid}") - if isinstance(post_data, dict): - post_data = [post_data] - elif not isinstance(post_data, list): - _print(f"POST data neither list nor dictionary: {args.post}") - for item in post_data: - portal.post_metadata(args.uuid, item) - if args.verbose: - _print(f"Done POSTing data from file ({args.post}) as type: {args.uuid}") schema, schema_name = _get_schema(portal, args.uuid) if schema: if args.copy: @@ -178,49 +185,33 @@ def main(): if not args.raw: if parent_schema_name := _get_parent_schema_name(schema): if schema.get("isAbstract") is True: - _print(f"{schema_name} | parent: {parent_schema_name} | abstract") + _print_output(f"{schema_name} | parent: {parent_schema_name} | abstract") else: - _print(f"{schema_name} | parent: {parent_schema_name}") + _print_output(f"{schema_name} | parent: {parent_schema_name}") else: - _print(schema_name) + _print_output(schema_name) _print_schema(schema, details=args.details, more_details=args.details, all=args.all, raw=args.raw, raw_yaml=args.yaml) return - elif args.patch: - if patch_data := _read_json_from_file(args.patch): - if args.verbose: - _print(f"PATCHing data from file ({args.patch}) for object: {args.uuid}") - if isinstance(patch_data, dict): - patch_data = [patch_data] - elif not isinstance(patch_data, list): - _print(f"PATCH data neither list nor dictionary: {args.patch}") - for item in patch_data: - portal.patch_metadata(args.uuid, item) - if args.verbose: - _print(f"Done PATCHing data from file ({args.patch}) as type: {args.uuid}") - return - else: - _print(f"No PATCH data found in file: {args.patch}") - sys.exit(1) - data = _get_portal_object(portal=portal, uuid=args.uuid, raw=args.raw, + data = _get_portal_object(portal=portal, uuid=args.uuid, raw=args.raw, inserts=args.inserts, database=args.database, check=args.bool, verbose=args.verbose) if args.bool: if data: _print(f"{args.uuid}: found") - sys.exit(0) + _exit(0) else: _print(f"{args.uuid}: not found") - sys.exit(1) + _exit(1) if args.copy: pyperclip.copy(json.dumps(data, indent=4)) if args.yaml: - _print(yaml.dump(data)) + _print_output(yaml.dump(data)) else: if args.indent > 0: - _print(_format_json_with_indent(data, indent=args.indent)) + _print_output(_format_json_with_indent(data, indent=args.indent)) else: - _print(json.dumps(data, default=str, indent=4)) + _print_output(json.dumps(data, default=str, indent=4)) def _format_json_with_indent(value: dict, indent: int = 0) -> Optional[str]: @@ -254,7 +245,7 @@ def _create_portal(ini: str, env: Optional[str] = None, def _get_portal_object(portal: Portal, uuid: str, - raw: bool = False, database: bool = False, + raw: bool = False, inserts: bool = False, database: bool = False, check: bool = False, verbose: bool = False) -> dict: response = None try: @@ -262,7 +253,7 @@ def _get_portal_object(portal: Portal, uuid: str, path = f"/{uuid}" else: path = uuid - response = portal.get(path, raw=raw, database=database) + response = portal.get(path, raw=raw or inserts, database=database) except Exception as e: if "404" in str(e) and "not found" in str(e).lower(): _print(f"Portal object not found at {portal.server}: {uuid}") @@ -278,7 +269,21 @@ def _get_portal_object(portal: Portal, uuid: str, if not response.json: _exit(f"Invalid JSON getting Portal object: {uuid}") response = response.json() - if raw: + if inserts: + # Format results as suitable for inserts (e.g. via update-portal-object). + response.pop("schema_version", None) + if ((isinstance(results := response.get("@graph"), list) and results) and + (isinstance(results_type := response.get("@type"), list) and results_type) and + (isinstance(results_type := results_type[0], str) and results_type.endswith("SearchResults")) and + (results_type := results_type[0:-len("SearchResults")])): # noqa + for result in results: + result.pop("schema_version", None) + response = {f"{results_type}": results} + # Get the result as non-raw so we can get its type. + elif ((response_cooked := portal.get(path, database=database)) and + (isinstance(response_type := response_cooked.json().get("@type"), list) and response_type)): + response = {f"{response_type[0]}": [response]} + elif raw: response.pop("schema_version", None) return response @@ -292,7 +297,7 @@ def _get_schema(portal: Portal, name: str) -> Tuple[Optional[dict], Optional[str if portal and name and (name := name.replace("_", "").replace("-", "").strip().lower()): if schemas := _get_schemas(portal): for schema_name in schemas: - if schema_name.replace("_", "").replace("-", "").strip().lower() == name: + if schema_name.replace("_", "").replace("-", "").strip().lower() == name.lower(): return schemas[schema_name], schema_name return None, None @@ -303,13 +308,37 @@ def _is_maybe_schema_name(value: str) -> bool: return False +def _is_schema_name(portal: Portal, value: str) -> bool: + try: + return _get_schema(portal, value)[0] is not None + except Exception: + return False + + +def _is_schema_named_json_file_name(portal: Portal, value: str) -> bool: + try: + return value.endswith(".json") and _is_schema_name(portal, os.path.basename(value[:-5])) + except Exception: + return False + + +def _get_schema_name_from_schema_named_json_file_name(portal: Portal, value: str) -> Optional[str]: + try: + if not value.endswith(".json"): + return None + _, schema_name = _get_schema(portal, os.path.basename(value[:-5])) + return schema_name + except Exception: + return False + + def _print_schema(schema: dict, details: bool = False, more_details: bool = False, all: bool = False, raw: bool = False, raw_yaml: bool = False) -> None: if raw: if raw_yaml: - _print(yaml.dump(schema)) + _print_output(yaml.dump(schema)) else: - _print(json.dumps(schema, indent=4)) + _print_output(json.dumps(schema, indent=4)) return _print_schema_info(schema, details=details, more_details=more_details, all=all) @@ -322,37 +351,37 @@ def _print_schema_info(schema: dict, level: int = 0, identifying_properties = schema.get("identifyingProperties") if level == 0: if required_properties := schema.get("required"): - _print("- required properties:") + _print_output("- required properties:") for required_property in sorted(list(set(required_properties))): if not all and required_property in _SCHEMAS_IGNORE_PROPERTIES: continue if property_type := (info := schema.get("properties", {}).get(required_property, {})).get("type"): if property_type == "array" and (array_type := info.get("items", {}).get("type")): - _print(f" - {required_property}: {property_type} of {array_type}") + _print_output(f" - {required_property}: {property_type} of {array_type}") else: - _print(f" - {required_property}: {property_type}") + _print_output(f" - {required_property}: {property_type}") else: - _print(f" - {required_property}") + _print_output(f" - {required_property}") if isinstance(any_of := schema.get("anyOf"), list): if ((any_of == [{"required": ["submission_centers"]}, {"required": ["consortia"]}]) or (any_of == [{"required": ["consortia"]}, {"required": ["submission_centers"]}])): # noqa # Very very special case. - _print(f" - at least one of:") - _print(f" - consortia: array of string") - _print(f" - submission_centers: array of string") + _print_output(f" - at least one of:") + _print_output(f" - consortia: array of string") + _print_output(f" - submission_centers: array of string") required = required_properties if identifying_properties := schema.get("identifyingProperties"): - _print("- identifying properties:") + _print_output("- identifying properties:") for identifying_property in sorted(list(set(identifying_properties))): if not all and identifying_property in _SCHEMAS_IGNORE_PROPERTIES: continue if property_type := (info := schema.get("properties", {}).get(identifying_property, {})).get("type"): if property_type == "array" and (array_type := info.get("items", {}).get("type")): - _print(f" - {identifying_property}: {property_type} of {array_type}") + _print_output(f" - {identifying_property}: {property_type} of {array_type}") else: - _print(f" - {identifying_property}: {property_type}") + _print_output(f" - {identifying_property}: {property_type}") else: - _print(f" - {identifying_property}") + _print_output(f" - {identifying_property}") if properties := schema.get("properties"): reference_properties = [] for property_name in properties: @@ -362,16 +391,16 @@ def _print_schema_info(schema: dict, level: int = 0, if link_to := property.get("linkTo"): reference_properties.append({"name": property_name, "ref": link_to}) if reference_properties: - _print("- reference properties:") + _print_output("- reference properties:") for reference_property in sorted(reference_properties, key=lambda key: key["name"]): - _print(f" - {reference_property['name']}: {reference_property['ref']}") + _print_output(f" - {reference_property['name']}: {reference_property['ref']}") if schema.get("additionalProperties") is True: - _print(f" - additional properties are allowed") + _print_output(f" - additional properties are allowed") if not more_details: return if properties := (schema.get("properties") if level == 0 else schema): if level == 0: - _print("- properties:") + _print_output("- properties:") for property_name in sorted(properties): if not all and property_name in _SCHEMAS_IGNORE_PROPERTIES: continue @@ -392,7 +421,7 @@ def _print_schema_info(schema: dict, level: int = 0, property_type = "open ended object" if property.get("calculatedProperty"): suffix += f" | calculated" - _print(f"{spaces}- {property_name}: {property_type}{suffix}") + _print_output(f"{spaces}- {property_name}: {property_type}{suffix}") _print_schema_info(object_properties, level=level + 1, details=details, more_details=more_details, all=all, required=property.get("required")) @@ -416,28 +445,28 @@ def _print_schema_info(schema: dict, level: int = 0, if property_type := property_items.get("type"): if property_type == "object": suffix = "" - _print(f"{spaces}- {property_name}: array of object{suffix}") + _print_output(f"{spaces}- {property_name}: array of object{suffix}") _print_schema_info(property_items.get("properties"), level=level + 1, details=details, more_details=more_details, all=all, required=property_items.get("required")) elif property_type == "array": # This (array-of-array) never happens to occur at this time (February 2024). - _print(f"{spaces}- {property_name}: array of array{suffix}") + _print_output(f"{spaces}- {property_name}: array of array{suffix}") else: - _print(f"{spaces}- {property_name}: array of {property_type}{suffix}") + _print_output(f"{spaces}- {property_name}: array of {property_type}{suffix}") else: - _print(f"{spaces}- {property_name}: array{suffix}") + _print_output(f"{spaces}- {property_name}: array{suffix}") else: - _print(f"{spaces}- {property_name}: array{suffix}") + _print_output(f"{spaces}- {property_name}: array{suffix}") if enumeration: nenums = 0 maxenums = 15 for enum in sorted(enumeration): if (nenums := nenums + 1) >= maxenums: if (remaining := len(enumeration) - nenums) > 0: - _print(f"{spaces} - [{remaining} more ...]") + _print_output(f"{spaces} - [{remaining} more ...]") break - _print(f"{spaces} - {enum}") + _print_output(f"{spaces} - {enum}") else: if isinstance(property_type, list): property_type = " or ".join(sorted(property_type)) @@ -479,18 +508,18 @@ def _print_schema_info(schema: dict, level: int = 0, suffix += f" | max length: {max_length}" if (min_length := property.get("minLength")) is not None: suffix += f" | min length: {min_length}" - _print(f"{spaces}- {property_name}: {property_type}{suffix}") + _print_output(f"{spaces}- {property_name}: {property_type}{suffix}") if enumeration: nenums = 0 maxenums = 15 for enum in sorted(enumeration): if (nenums := nenums + 1) >= maxenums: if (remaining := len(enumeration) - nenums) > 0: - _print(f"{spaces} - [{remaining} more ...]") + _print_output(f"{spaces} - [{remaining} more ...]") break - _print(f"{spaces} - {enum}") + _print_output(f"{spaces} - {enum}") else: - _print(f"{spaces}- {property_name}") + _print_output(f"{spaces}- {property_name}") def _print_all_schema_names(portal: Portal, @@ -501,9 +530,9 @@ def _print_all_schema_names(portal: Portal, if raw: if raw_yaml: - _print(yaml.dump(schemas)) + _print_output(yaml.dump(schemas)) else: - _print(json.dumps(schemas, indent=4)) + _print_output(json.dumps(schemas, indent=4)) return if tree: @@ -513,14 +542,14 @@ def _print_all_schema_names(portal: Portal, for schema_name in sorted(schemas.keys()): if parent_schema_name := _get_parent_schema_name(schemas[schema_name]): if schemas[schema_name].get("isAbstract") is True: - _print(f"{schema_name} | parent: {parent_schema_name} | abstract") + _print_output(f"{schema_name} | parent: {parent_schema_name} | abstract") else: - _print(f"{schema_name} | parent: {parent_schema_name}") + _print_output(f"{schema_name} | parent: {parent_schema_name}") else: if schemas[schema_name].get("isAbstract") is True: - _print(f"{schema_name} | abstract") + _print_output(f"{schema_name} | abstract") else: - _print(schema_name) + _print_output(schema_name) if details: _print_schema(schemas[schema_name], details=details, more_details=more_details, all=all) @@ -559,8 +588,7 @@ def name_of(name: str) -> str: # noqa def _print_tree(root_name: Optional[str], children_of: Callable, has_children: Optional[Callable] = None, - name_of: Optional[Callable] = None, - print: Callable = print) -> None: + name_of: Optional[Callable] = None) -> None: """ Recursively prints as a tree structure the given root name and any of its children (again, recursively) as specified by the given children_of callable; @@ -589,26 +617,26 @@ def tree_generator(name: str, prefix: str = ""): if has_children(path): extension = branch if pointer == tee else space yield from tree_generator(path, prefix=prefix+extension) - print(first + ((name_of(root_name) if callable(name_of) else root_name) or "root")) + _print_output(first + ((name_of(root_name) if callable(name_of) else root_name) or "root")) for line in tree_generator(root_name, prefix=" "): - print(line) + _print_output(line) def _read_json_from_file(file: str) -> Optional[dict]: if not os.path.exists(file): _print(f"Cannot find file: {file}") - sys.exit(1) + _exit(1) try: with io.open(file, "r") as f: try: return json.load(f) except Exception: _print(f"Cannot parse JSON in file: {file}") - sys.exit(1) + _exit(1) except Exception as e: - print(e) + _print(e) _print(f"Cannot open file: {file}") - sys.exit(1) + _exit(1) def _print(*args, **kwargs): @@ -617,10 +645,26 @@ def _print(*args, **kwargs): sys.stdout.flush() -def _exit(message: Optional[str] = None) -> None: - if message: +def _print_output(value: str): + global _output_file + if _output_file: + _output_file.write(value) + _output_file.write("\n") + else: + with uncaptured_output(): + PRINT(value) + sys.stdout.flush() + + +def _exit(message: Optional[Union[str, int]] = None, status: Optional[int] = None) -> None: + global _output_file + if isinstance(message, str): _print(f"ERROR: {message}") - sys.exit(1) + elif isinstance(message, int) and not isinstance(status, int): + status = message + if _output_file: + _output_file.close() + sys.exit(status if isinstance(status, int) else (0 if status is None else 1)) if __name__ == "__main__": diff --git a/dcicutils/snapshot_utils.py b/dcicutils/snapshot_utils.py index 253fc5858..4576bef4f 100644 --- a/dcicutils/snapshot_utils.py +++ b/dcicutils/snapshot_utils.py @@ -1,6 +1,7 @@ import datetime import logging +import dcicutils.hack_for_elasticsearch_numpy_usage # noqa from elasticsearch.exceptions import NotFoundError from .misc_utils import ( environ_bool, PRINT, camel_case_to_snake_case, full_object_name, diff --git a/docs/source/dcicutils.rst b/docs/source/dcicutils.rst index b33b8a2b5..653e4e306 100644 --- a/docs/source/dcicutils.rst +++ b/docs/source/dcicutils.rst @@ -225,6 +225,13 @@ glacier_utils :members: +hack_for_elasticsearch_numpy_usage +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. automodule:: dcicutils.hack_for_elasticsearch_numpy_usage + :members: + + http_utils ^^^^^^^^^^^ diff --git a/pyproject.toml b/pyproject.toml index 204cf0a88..5f9b51c6f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "dcicutils" -version = "8.13.0" +version = "8.13.3" description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources" authors = ["4DN-DCIC Team "] license = "MIT" @@ -96,6 +96,7 @@ publish-to-pypi = "dcicutils.scripts.publish_to_pypi:main" show-contributors = "dcicutils.contribution_scripts:show_contributors_main" run-license-checker = "dcicutils.scripts.run_license_checker:main" view-portal-object = "dcicutils.scripts.view_portal_object:main" +update-portal-object = "dcicutils.scripts.update_portal_object:main" [tool.pytest.ini_options] diff --git a/pytest.ini b/pytest.ini new file mode 100644 index 000000000..0cd76e245 --- /dev/null +++ b/pytest.ini @@ -0,0 +1,3 @@ +[pytest] +markers = + last: run these tests last diff --git a/test/test_hack_for_elasticsearch_numpy_usage.py b/test/test_hack_for_elasticsearch_numpy_usage.py new file mode 100644 index 000000000..a3f2ce835 --- /dev/null +++ b/test/test_hack_for_elasticsearch_numpy_usage.py @@ -0,0 +1,16 @@ +import pytest +import subprocess +import sys + +pytestmark = [pytest.mark.last] + + +def test_hack_for_elasticsearch_numpy_usage(): + try: + subprocess.run("pip install numpy==2.0.0".split()) + for module in [module_name for module_name in sys.modules + if module_name.startswith("elasticsearch") or module_name.startswith("dcicutils")]: + del sys.modules[module] + import dcicutils.ff_utils # noqa + finally: + subprocess.run("pip uninstall --yes numpy".split())