From 3956d5e9def4dee0f4de00a05f27d84dadc45bd0 Mon Sep 17 00:00:00 2001 From: David Michaels Date: Fri, 1 Dec 2023 13:05:15 -0500 Subject: [PATCH 01/53] Fix in data_readers.Excel to strip sheet names. --- dcicutils/data_readers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dcicutils/data_readers.py b/dcicutils/data_readers.py index 46a07a059..b6f82263d 100644 --- a/dcicutils/data_readers.py +++ b/dcicutils/data_readers.py @@ -139,7 +139,7 @@ def sheet_reader(self, sheet_name: str) -> ExcelSheetReader: def open(self) -> None: if self._workbook is None: self._workbook = openpyxl.load_workbook(self._file, data_only=True) - self.sheet_names = self._workbook.sheetnames or [] + self.sheet_names = [(sheet_name or "").strip() for sheet_name in (self._workbook.sheetnames or [])] def __del__(self) -> None: if (workbook := self._workbook) is not None: From 15af785f57d479d00a72b35329c9c9b49023d510 Mon Sep 17 00:00:00 2001 From: David Michaels Date: Fri, 1 Dec 2023 13:06:10 -0500 Subject: [PATCH 02/53] Update version and CHANGELOG.rst --- CHANGELOG.rst | 5 +++++ pyproject.toml | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 317a7391b..102315cda 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -6,6 +6,11 @@ dcicutils Change Log ---------- +8.4.1 +===== +* Strip sheet name in data_readers.Excel. + + 8.4.0 ===== * More work related to SMaHT ingestion (bundle/sheet_utils, data_readers, etc). diff --git a/pyproject.toml b/pyproject.toml index 4ae62c250..5dc35daab 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "dcicutils" -version = "8.4.0" +version = "8.4.0.1b1" # TODO: To become 8.4.1 description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources" authors = ["4DN-DCIC Team "] license = "MIT" From 103ebdc57a449264e0823681c3a1cc544ff5139b Mon Sep 17 00:00:00 2001 From: David Michaels Date: Fri, 1 Dec 2023 14:46:35 -0500 Subject: [PATCH 03/53] added structured_data, from smaht-portal/ingestion --- dcicutils/structured_data.py | 526 +++++++++++++++++++++++++++++++++++ 1 file changed, 526 insertions(+) create mode 100644 dcicutils/structured_data.py diff --git a/dcicutils/structured_data.py b/dcicutils/structured_data.py new file mode 100644 index 000000000..3c9d4a66f --- /dev/null +++ b/dcicutils/structured_data.py @@ -0,0 +1,526 @@ +import copy +from functools import lru_cache +import json +from jsonschema import Draft7Validator as SchemaValidator +import os +import re +import sys +from typing import Any, Callable, List, Optional, Tuple, Type, Union +from webtest.app import TestApp +from dcicutils.data_readers import CsvReader, Excel, RowReader +from dcicutils.ff_utils import get_metadata, get_schema +from dcicutils.misc_utils import (load_json_if, merge_objects, remove_empty_properties, right_trim, split_string, + to_boolean, to_camel_case, to_enum, to_float, to_integer, VirtualApp) +from dcicutils.zip_utils import unpack_gz_file_to_temporary_file, unpack_files + + +# Classes/functions to parse a CSV or Excel Spreadsheet into structured data, using a specialized +# syntax to allow structured object properties to be referenced by column specifiers. This syntax +# uses an (intuitive) dot notation to reference nested objects, and a (less intuitive) notation +# utilizing the "#" character to reference array elements. May also further coerce data types by +# consulting an optionally specified JSON schema. +# +# Alternate and semantically equivalent implementation of dcicutils.{sheet,bundle}_utils. +# Spare time exercise, with benefit of sheet_utils implementation experience. + +ACCEPTABLE_FILE_SUFFIXES = [".csv", ".tsv", ".json", ".xls", ".xlsx", ".gz", ".tar", ".tar.gz", ".tgz", ".zip"] +ARRAY_VALUE_DELIMITER_CHAR = "|" +ARRAY_VALUE_DELIMITER_ESCAPE_CHAR = "\\" +ARRAY_NAME_SUFFIX_CHAR = "#" +ARRAY_NAME_SUFFIX_REGEX = re.compile(rf"{ARRAY_NAME_SUFFIX_CHAR}\d+") +DOTTED_NAME_DELIMITER_CHAR = "." + +# Forward type references for type hints. +Portal = Type["Portal"] +PortalAny = Union[VirtualApp, TestApp, Portal] +Schema = Type["Schema"] +StructuredDataSet = Type["StructuredDataSet"] + + +class StructuredDataSet: + + def __init__(self, file: Optional[str] = None, portal: Optional[PortalAny] = None, + schemas: Optional[List[dict]] = None, data: Optional[List[dict]] = None, + order: Optional[List[str]] = None, prune: bool = True) -> None: + self.data = {} if not data else data + self._portal = Portal.create(portal, data=self.data, schemas=schemas) # If None then no schemas nor refs. + self._order = order + self._prune = prune + self._issues = None + self._load_file(file) if file else None + + @staticmethod + def load(file: str, portal: Optional[PortalAny] = None, schemas: Optional[List[dict]] = None, + order: Optional[List[str]] = None, prune: bool = True) -> StructuredDataSet: + return StructuredDataSet(file=file, portal=portal, schemas=schemas, order=order, prune=prune) + + def validate(self) -> Optional[List[str]]: + issues = [] + for type_name in self.data: + if (schema := Schema.load_by_name(type_name, portal=self._portal)): + item_number = 0 + for data in self.data[type_name]: + item_number += 1 + if (validation_issues := schema.validate(data)) is not None: + issues.extend([f"{schema.name} [{item_number}]: {issue}" for issue in validation_issues]) + return issues + (self._issues or []) + + def _load_file(self, file: str) -> None: + # Returns a dictionary where each property is the name (i.e. the type) of the data, + # and the value is array of dictionaries for the data itself. Handle these kinds of files: + # 1. Single CSV of JSON file, where the (base) name of the file is the data type name. + # 2. Single Excel file containing one or more sheets, where each sheet + # represents (i.e. is named for, and contains data for) a different type. + # 3. Zip file (.zip or .tar.gz or .tgz or .tar), containing data files to load, where the + # base name of each contained file is the data type name; or any of above gzipped (.gz). + if file.endswith(".gz") or file.endswith(".tgz"): + with unpack_gz_file_to_temporary_file(file) as file: + return self._load_normal_file(file) + return self._load_normal_file(file) + + def _load_normal_file(self, file: str) -> None: + if file.endswith(".csv") or file.endswith(".tsv"): + self._load_csv_file(file) + elif file.endswith(".xls") or file.endswith(".xlsx"): + self._load_excel_file(file) + elif file.endswith(".json"): + self._load_json_file(file) + elif file.endswith(".tar") or file.endswith(".zip"): + self._load_packed_file(file) + + def _load_packed_file(self, file: str) -> None: + for file in unpack_files(file, suffixes=ACCEPTABLE_FILE_SUFFIXES): + self._load_file(file) + + def _load_csv_file(self, file: str) -> None: + self._load_reader(reader := CsvReader(file), type_name=Schema.type_name(file)) + self._note_issues(reader.issues, os.path.basename(file)) + + def _load_excel_file(self, file: str) -> None: + excel = Excel(file) # Order the sheet names by any specified ordering (e.g. ala snovault.loadxl). + order = {Schema.type_name(key): index for index, key in enumerate(self._order)} if self._order else {} + for sheet_name in sorted(excel.sheet_names, key=lambda key: order.get(Schema.type_name(key), sys.maxsize)): + self._load_reader(reader := excel.sheet_reader(sheet_name), type_name=Schema.type_name(sheet_name)) + self._note_issues(reader.issues, f"{file}:{sheet_name}") + + def _load_json_file(self, file: str) -> None: + with open(file) as f: + self._add(Schema.type_name(file), json.load(f)) + + def _load_reader(self, reader: RowReader, type_name: str) -> None: + schema = None + noschema = False + structured_row_template = None + for row in reader: + if not structured_row_template: # Delay creation just so we don't create it if there are no rows. + if not schema and not noschema and not (schema := Schema.load_by_name(type_name, portal=self._portal)): + noschema = True + structured_row_template = _StructuredRowTemplate(reader.header, schema) + structured_row = structured_row_template.create_row() + for column_name, value in row.items(): + structured_row_template.set_value(structured_row, column_name, value, reader.location) + if schema and (schema_name := schema.name): + type_name = schema_name + self._add(type_name, structured_row) + + def _add(self, type_name: str, data: Union[dict, List[dict]]) -> None: + if self._prune: + remove_empty_properties(data) + if type_name in self.data: + self.data[type_name].extend([data] if isinstance(data, dict) else data) + else: + self.data[type_name] = [data] if isinstance(data, dict) else data + + def _note_issues(self, issues: Optional[List[str]], source: str) -> None: + if issues: + if not self._issues: + self._issues = [] + self._issues.append({source: issues}) + +class _StructuredRowTemplate: + + def __init__(self, column_names: List[str], schema: Optional[Schema] = None) -> None: + self._schema = schema + self._set_value_functions = {} + self._template = self._create_row_template(column_names) + + def create_row(self) -> dict: + return copy.deepcopy(self._template) + + def set_value(self, data: dict, column_name: str, value: str, loc: int = -1) -> None: + if (set_value_function := self._set_value_functions.get(column_name)): + src = (f"{f'{self._schema.name}.' if self._schema else ''}" + + f"{f'{column_name}' if column_name else ''}{f' [{loc}]' if loc else ''}") + set_value_function(data, value, src) + + def _create_row_template(self, column_names: List[str]) -> dict: # Surprisingly tricky code here. + + def parse_array_components(column_name: str, value: Optional[Any], path: List[Union[str, int]]) -> Tuple[Optional[str], Optional[List[Any]]]: + array_name, array_indices = Schema.array_indices(column_name) + if not array_name: + return None, None + array = None + for array_index in array_indices[::-1]: # Reverse iteration from the last/inner-most index to first. + if not (array is None and value is None): + array_index = max(array_index, 0) + path.insert(0, array_index) + array_length = array_index + 1 + if array is None: + if value is None: + array = [None for _ in range(array_length)] + else: + array = [copy.deepcopy(value) for _ in range(array_length)] + else: + array = [copy.deepcopy(array) for _ in range(array_length)] + return array_name, array + + def parse_components(column_components: List[str], path: List[Union[str, int]]) -> dict: + value = parse_components(column_components[1:], path) if len(column_components) > 1 else None + array_name, array = parse_array_components(column_component := column_components[0], value, path) + path.insert(0, array_name or column_component) + return {array_name: array} if array_name else {column_component: value} + + def set_value_internal(data: Union[dict, list], value: Optional[Any], src: Optional[str], + path: List[Union[str, int]], mapv: Optional[Callable]) -> None: + + def set_value_backtrack_object(path_index: int, path_element: str) -> None: + nonlocal data, path, original_data + backtrack_data = original_data + for j in range(path_index - 1): + if not isinstance(path[j], str): + return + backtrack_data = backtrack_data[path[j]] + data = backtrack_data[path[path_index - 1]] = {path_element: None} + + original_data = data + json_value = None + if isinstance(path[-1], int) and (json_value := load_json_if(value, is_array=True)): + path = right_trim(path, remove=lambda value: isinstance(value, int)) + for i, p in enumerate(path[:-1]): + if isinstance(p, str) and (not isinstance(data, dict) or p not in data): + set_value_backtrack_object(i, p) + data = data[p] + if (p := path[-1]) == -1 and isinstance(value, str): + values = _split_array_string(value) + if mapv: + values = [mapv(value, src) for value in values] + merge_objects(data, values) + else: + if json_value or (json_value := load_json_if(value, is_array=True, is_object=True)): + data[p] = json_value + else: + if isinstance(p, str) and (not isinstance(data, dict) or p not in data): + set_value_backtrack_object(i + 1, p) + data[p] = mapv(value, src) if mapv else value + + def ensure_column_consistency(column_name: str) -> None: + column_components = _split_dotted_string(Schema.normalize_column_name(column_name)) + for existing_column_name in self._set_value_functions: + existing_column_components = _split_dotted_string(Schema.normalize_column_name(existing_column_name)) + if (Schema.unadorn_column_name(column_components[0]) != + Schema.unadorn_column_name(existing_column_components[0])): # noqa + break + for i in range(min(len(column_components), len(existing_column_components))): + if i >= len(column_components) or i >= len(existing_column_components): + break + if ((column_components[i] != existing_column_components[i]) and + (column_components[i].endswith(ARRAY_NAME_SUFFIX_CHAR) or + existing_column_components[i].endswith(ARRAY_NAME_SUFFIX_CHAR))): + raise Exception(f"Inconsistent columns: {column_components[i]} {existing_column_components[i]}") + + structured_row_template = {} + for column_name in column_names or []: + ensure_column_consistency(column_name) + rational_column_name = self._schema.rationalize_column_name(column_name) if self._schema else column_name + map_value_function = self._schema.get_map_value_function(rational_column_name) if self._schema else None + if (column_components := _split_dotted_string(rational_column_name)): + merge_objects(structured_row_template, parse_components(column_components, path := []), True) + self._set_value_functions[column_name] = (lambda data, value, src, path=path, mapv=map_value_function: + set_value_internal(data, value, src, path, mapv)) + return structured_row_template + + +class Schema: + + def __init__(self, schema_json: dict, portal: Optional[Portal] = None) -> None: + self.data = schema_json + self.name = Schema.type_name(schema_json.get("title", "")) if schema_json else "" + self._portal = portal # Needed only to resolve linkTo references. + self._map_value_functions = { + "boolean": self._map_function_boolean, + "enum": self._map_function_enum, + "integer": self._map_function_integer, + "number": self._map_function_number, + "string": self._map_function_string + } + self._typeinfo = self._create_typeinfo(schema_json) + + @staticmethod + def load_by_name(name: str, portal: Portal) -> Optional[dict]: + return Schema(portal.get_schema(Schema.type_name(name)), portal) if portal else None + + def validate(self, data: dict) -> Optional[List[str]]: + issues = [] + for issue in SchemaValidator(self.data, format_checker=SchemaValidator.FORMAT_CHECKER).iter_errors(data): + issues.append(issue.message) + return issues if issues else None + + def get_map_value_function(self, column_name: str) -> Optional[Any]: + return (self._get_typeinfo(column_name) or {}).get("map") + + def _get_typeinfo(self, column_name: str) -> Optional[dict]: + if isinstance(info := self._typeinfo.get(column_name), str): + info = self._typeinfo.get(info) + if not info and isinstance(info := self._typeinfo.get(self.unadorn_column_name(column_name)), str): + info = self._typeinfo.get(info) + return info + + def _map_function(self, typeinfo: dict) -> Optional[Callable]: + if isinstance(typeinfo, dict) and (typeinfo_type := typeinfo.get("type")) is not None: + if isinstance(typeinfo_type, list): + # The type specifier can actually be a list of acceptable types; for + # example smaht-portal/schemas/mixins.json/meta_workflow_input#.value; + # we will take the first one for which we have a mapping function. + # TODO: Maybe more correct to get all map function and map to any for values. + for acceptable_type in typeinfo_type: + if (map_function := self._map_value_functions.get(acceptable_type)) is not None: + break + elif not isinstance(typeinfo_type, str): + return None # Invalid type specifier; ignore, + elif isinstance(typeinfo.get("enum"), list): + map_function = self._map_function_enum + elif isinstance(typeinfo.get("linkTo"), str): + map_function = self._map_function_ref + else: + map_function = self._map_value_functions.get(typeinfo_type) + return map_function(typeinfo) if map_function else None + return None + + def _map_function_boolean(self, typeinfo: dict) -> Callable: + def map_boolean(value: str, src: Optional[str]) -> Any: + return to_boolean(value, value) + return map_boolean + + def _map_function_enum(self, typeinfo: dict) -> Callable: + def map_enum(value: str, enum_specifiers: dict, src: Optional[str]) -> Any: + return to_enum(value, enum_specifiers) + return lambda value, src: map_enum(value, typeinfo.get("enum", []), src) + + def _map_function_integer(self, typeinfo: dict) -> Callable: + def map_integer(value: str, src: Optional[str]) -> Any: + return to_integer(value, value) + return map_integer + + def _map_function_number(self, typeinfo: dict) -> Callable: + def map_number(value: str, src: Optional[str]) -> Any: + return to_float(value, value) + return map_number + + def _map_function_string(self, typeinfo: dict) -> Callable: + def map_string(value: str, src: Optional[str]) -> str: + return value if value is not None else "" + return map_string + + def _map_function_ref(self, typeinfo: dict) -> Callable: + def map_ref(value: str, link_to: str, portal: Optional[Portal], src: Optional[str]) -> Any: + nonlocal self, typeinfo + exception = None + if not value: + if (column := typeinfo.get("column")) and column in self.data.get("required", []): + exception = f"No required reference (linkTo) value for: {link_to}" + elif portal and not portal.ref_exists(link_to, value): + exception = f"Cannot resolve reference (linkTo) for: {link_to}" + if exception: + raise Exception(exception + f"{f'/{value}' if value else ''}{f' from {src}' if src else ''}") + return value + return lambda value, src: map_ref(value, typeinfo.get("linkTo"), self._portal, src) + + def _create_typeinfo(self, schema_json: dict, parent_key: Optional[str] = None) -> dict: + """ + Given a JSON schema return a dictionary of all the property names it defines, but with + the names of any nested properties (i.e objects within objects) flattened into a single + property name in dot notation; and set the value of each of these flat property names + to the type of the terminal/leaf value of the (either) top-level or nested type. N.B. We + do NOT currently support array-of-arry or array-of-multiple-types. E.g. for this schema: + + { "properties": { + "abc": { + "type": "object", + "properties": { + "def": { "type": "string" }, + "ghi": { + "type": "object", + "properties": { + "mno": { "type": "number" } + } + } + } }, + "stu": { "type": "array", "items": { "type": "string" } }, + "vw": { + "type": "array", + "items": { + "type": "object", + "properties": { + "xyz": { "type": "integer" } + } } + } } } + + Then we will return this flat dictionary: + + { "abc.def": { "type": "string", "map": }, + "abc.ghi.mno": { "type": "number", "map": }, + "stu#": { "type": "string", "map": }, + "vw#.xyz": { "type": "integer", "map": } } + """ + result = {} + if (properties := schema_json.get("properties")) is None: + if parent_key: + if (schema_type := schema_json.get("type")) is None: + schema_type = "string" # Undefined array type; should not happen; just make it a string. + if schema_type == "array": + parent_key += ARRAY_NAME_SUFFIX_CHAR + result[parent_key] = {"type": schema_type, "map": self._map_function(schema_json)} + if ARRAY_NAME_SUFFIX_CHAR in parent_key: + result[parent_key.replace(ARRAY_NAME_SUFFIX_CHAR, "")] = parent_key + return result + for property_key, property_value in properties.items(): + if not isinstance(property_value, dict) or not property_value: + continue # Should not happen; every property within properties should be an object; no harm; ignore. + key = property_key if parent_key is None else f"{parent_key}{DOTTED_NAME_DELIMITER_CHAR}{property_key}" + if ARRAY_NAME_SUFFIX_CHAR in property_key: + raise Exception(f"Property name with \"{ARRAY_NAME_SUFFIX_CHAR}\" in JSON schema NOT supported: {key}") + if (property_value_type := property_value.get("type")) == "object" and "properties" in property_value: + result.update(self._create_typeinfo(property_value, parent_key=key)) + continue + if property_value_type == "array": + while property_value_type == "array": # Handle array of array here even though we don't in general. + if not isinstance((array_property_items := property_value.get("items")), dict): + if array_property_items is None or isinstance(array_property_items, list): + raise Exception(f"Array of undefined or multiple types in JSON schema NOT supported: {key}") + raise Exception(f"Invalid array type specifier in JSON schema: {key}") + key = key + ARRAY_NAME_SUFFIX_CHAR + property_value = array_property_items + property_value_type = property_value.get("type") + result.update(self._create_typeinfo(array_property_items, parent_key=key)) + continue + result[key] = {"type": property_value_type, "map": self._map_function({**property_value, "column": key})} + if ARRAY_NAME_SUFFIX_CHAR in key: + result[key.replace(ARRAY_NAME_SUFFIX_CHAR, "")] = key + return result + + def rationalize_column_name(self, column_name: str, schema_column_name: Optional[str] = None) -> str: + """ + Replaces any (dot-separated) components of the given column_name which have array indicators/suffixes + with the corresponding value from the (flattened) schema column names, but with any actual array + indices from the given column name component. For example, if the (flattened) schema column name + if "abc#.def##.ghi" and the given column name is "abc.def#1#2#.ghi" returns "abc#.def#1#2.ghi", + or if the schema column name is "abc###" and the given column name is "abc#0#" then "abc#0##". + This will "correct" specified columns name (with array indicators) according to the schema. + """ + if not isinstance(schema_column_name := self._typeinfo.get(self.unadorn_column_name(column_name)), str): + return column_name + schema_column_components = _split_dotted_string(schema_column_name) + for i in range(len(column_components := _split_dotted_string(column_name))): + schema_array_name, schema_array_indices = Schema.array_indices(schema_column_components[i]) + if schema_array_indices: + if (array_indices := Schema.array_indices(column_components[i])[1]): + if len(schema_array_indices) > len(array_indices): + schema_array_indices = array_indices + [-1] * (len(schema_array_indices) - len(array_indices)) + else: + schema_array_indices = array_indices[:len(schema_array_indices)] + array_qualifiers = "".join([(("#" + str(i)) if i >= 0 else "#") for i in schema_array_indices]) + column_components[i] = schema_array_name + array_qualifiers + return DOTTED_NAME_DELIMITER_CHAR.join(column_components) + + @staticmethod + def normalize_column_name(column_name: str) -> str: + return Schema.unadorn_column_name(column_name, False) + + @staticmethod + def unadorn_column_name(column_name: str, full: bool = True) -> str: + """ + Given a string representing a flat column name, i.e possibly dot-separated name components, + and where each component possibly ends with an array suffix (i.e. pound sign - #) followed + by an optional integer, returns the unadorned column, without any array suffixes/specifiers. + """ + unadorned_column_name = DOTTED_NAME_DELIMITER_CHAR.join( + [ARRAY_NAME_SUFFIX_REGEX.sub(ARRAY_NAME_SUFFIX_CHAR, value) + for value in _split_dotted_string(column_name)]) + return unadorned_column_name.replace(ARRAY_NAME_SUFFIX_CHAR, "") if full else unadorned_column_name + + @staticmethod + def type_name(value: str) -> str: # File or other name. + name = os.path.basename(value).replace(" ", "") if isinstance(value, str) else "" + return to_camel_case(name[0:dot] if (dot := name.rfind(".")) > 0 else name) + + @staticmethod + def array_indices(name: str) -> Tuple[Optional[str], Optional[List[int]]]: + indices = [] + while (array_indicator_position := name.rfind(ARRAY_NAME_SUFFIX_CHAR)) > 0: + array_index = name[array_indicator_position + 1:] if array_indicator_position < len(name) - 1 else -1 + if (array_index := to_integer(array_index)) is None: + break + name = name[0:array_indicator_position] + indices.insert(0, array_index) + return (name, indices) if indices else (None, None) + + +class Portal: + + def __init__(self, portal: PortalAny, data: Optional[dict] = None, schemas: Optional[List[dict]] = None) -> None: + self.vapp = portal.vapp if isinstance(portal, Portal) else portal + self._data = data # Data set being loaded (e.g. by StructuredDataSet). + self._schemas = schemas # Explicitly specified known schemas. + + @lru_cache(maxsize=256) + def get_schema(self, schema_name: str) -> Optional[dict]: + def get_schema_internal(schema_name: str) -> Optional[dict]: + return (next((schema for schema in self._schemas or [] + if Schema.type_name(schema.get("title")) == Schema.type_name(schema_name)), None) or + get_schema(schema_name, portal_vapp=self.vapp)) + try: + if (schema := get_schema_internal(schema_name)): + return schema + except Exception: # Try/force camel-case if all upper/lower-case. + if schema_name == schema_name.upper(): + if (schema := get_schema_internal(schema_name.lower().title())): + return schema + elif schema_name == schema_name.lower(): + if (schema := get_schema_internal(schema_name.title())): + return schema + raise + + @lru_cache(maxsize=256) + def get_metadata(self, object_name: str) -> Optional[dict]: + try: + return get_metadata(object_name, vapp=self.vapp) + except Exception: + return None + + def ref_exists(self, type_name: str, value: str) -> bool: + if self._data and (items := self._data.get(type_name)) and (schema := self.get_schema(type_name)): + iproperties = set(schema.get("identifyingProperties", [])) | {"identifier", "uuid"} + for item in items: + if (ivalue := next((item[iproperty] for iproperty in iproperties if iproperty in item), None)): + if isinstance(ivalue, list) and value in ivalue or ivalue == value: + return True + return self.get_metadata(f"/{type_name}/{value}") is not None + + @staticmethod + def create(portal: Optional[PortalAny] = None, + data: Optional[dict] = None, schemas: Optional[List[dict]] = None) -> Optional[Portal]: + if isinstance(portal, Portal): + if data is not None: + portal._data = data + if schemas is not None: + portal._schemas = schemas + return portal + return Portal(portal, data=data, schemas=schemas) if portal else None + + +def _split_dotted_string(value: str): + return split_string(value, DOTTED_NAME_DELIMITER_CHAR) + + +def _split_array_string(value: str): + return split_string(value, ARRAY_VALUE_DELIMITER_CHAR, ARRAY_VALUE_DELIMITER_ESCAPE_CHAR) From 38dd68d266b55a0cbbf9d5939afa357ca519b24d Mon Sep 17 00:00:00 2001 From: David Michaels Date: Sun, 3 Dec 2023 11:10:32 -0500 Subject: [PATCH 04/53] Moved structured_data.py here from smaht-portal. --- dcicutils/structured_data.py | 244 +++++++++++++++++++++++++++++------ 1 file changed, 208 insertions(+), 36 deletions(-) diff --git a/dcicutils/structured_data.py b/dcicutils/structured_data.py index 3c9d4a66f..0f38dde03 100644 --- a/dcicutils/structured_data.py +++ b/dcicutils/structured_data.py @@ -3,15 +3,21 @@ import json from jsonschema import Draft7Validator as SchemaValidator import os +from pyramid.paster import get_app +from pyramid.router import Router import re +import requests +from requests.models import Response as RequestResponse import sys from typing import Any, Callable, List, Optional, Tuple, Type, Union -from webtest.app import TestApp +from webtest.app import TestApp, TestResponse +from dcicutils.common import OrchestratedApp, APP_CGAP, APP_FOURFRONT, APP_SMAHT, ORCHESTRATED_APPS +from dcicutils.creds_utils import CGAPKeyManager, FourfrontKeyManager, SMaHTKeyManager from dcicutils.data_readers import CsvReader, Excel, RowReader -from dcicutils.ff_utils import get_metadata, get_schema +from dcicutils.ff_utils import get_metadata, get_schema, patch_metadata, post_metadata from dcicutils.misc_utils import (load_json_if, merge_objects, remove_empty_properties, right_trim, split_string, to_boolean, to_camel_case, to_enum, to_float, to_integer, VirtualApp) -from dcicutils.zip_utils import unpack_gz_file_to_temporary_file, unpack_files +from dcicutils.zip_utils import temporary_file, unpack_gz_file_to_temporary_file, unpack_files # Classes/functions to parse a CSV or Excel Spreadsheet into structured data, using a specialized @@ -32,25 +38,26 @@ # Forward type references for type hints. Portal = Type["Portal"] -PortalAny = Union[VirtualApp, TestApp, Portal] +PortalBase = Type["PortalBase"] Schema = Type["Schema"] StructuredDataSet = Type["StructuredDataSet"] class StructuredDataSet: - def __init__(self, file: Optional[str] = None, portal: Optional[PortalAny] = None, + def __init__(self, file: Optional[str] = None, portal: Optional[Union[VirtualApp, TestApp, Portal]] = None, schemas: Optional[List[dict]] = None, data: Optional[List[dict]] = None, order: Optional[List[str]] = None, prune: bool = True) -> None: - self.data = {} if not data else data - self._portal = Portal.create(portal, data=self.data, schemas=schemas) # If None then no schemas nor refs. + self.data = {} if not data else data # If portal is None then no schemas nor refs. + self._portal = Portal(portal, data=self.data, schemas=schemas) if portal else None self._order = order self._prune = prune self._issues = None self._load_file(file) if file else None @staticmethod - def load(file: str, portal: Optional[PortalAny] = None, schemas: Optional[List[dict]] = None, + def load(file: str, portal: Optional[Union[VirtualApp, TestApp, Portal]] = None, + schemas: Optional[List[dict]] = None, order: Optional[List[str]] = None, prune: bool = True) -> StructuredDataSet: return StructuredDataSet(file=file, portal=portal, schemas=schemas, order=order, prune=prune) @@ -137,6 +144,7 @@ def _note_issues(self, issues: Optional[List[str]], source: str) -> None: self._issues = [] self._issues.append({source: issues}) + class _StructuredRowTemplate: def __init__(self, column_names: List[str], schema: Optional[Schema] = None) -> None: @@ -155,7 +163,8 @@ def set_value(self, data: dict, column_name: str, value: str, loc: int = -1) -> def _create_row_template(self, column_names: List[str]) -> dict: # Surprisingly tricky code here. - def parse_array_components(column_name: str, value: Optional[Any], path: List[Union[str, int]]) -> Tuple[Optional[str], Optional[List[Any]]]: + def parse_array_components(column_name: str, value: Optional[Any], + path: List[Union[str, int]]) -> Tuple[Optional[str], Optional[List[Any]]]: array_name, array_indices = Schema.array_indices(column_name) if not array_name: return None, None @@ -274,7 +283,7 @@ def _get_typeinfo(self, column_name: str) -> Optional[dict]: if not info and isinstance(info := self._typeinfo.get(self.unadorn_column_name(column_name)), str): info = self._typeinfo.get(info) return info - + def _map_function(self, typeinfo: dict) -> Optional[Callable]: if isinstance(typeinfo, dict) and (typeinfo_type := typeinfo.get("type")) is not None: if isinstance(typeinfo_type, list): @@ -465,38 +474,204 @@ def array_indices(name: str) -> Tuple[Optional[str], Optional[List[int]]]: return (name, indices) if indices else (None, None) -class Portal: +class PortalBase: + + def __init__(self, + arg: Optional[Union[VirtualApp, TestApp, Router, Portal, str]] = None, + env: Optional[str] = None, app: OrchestratedApp = APP_SMAHT, server: Optional[str] = None, + key: Optional[Union[dict, tuple]] = None, + portal: Optional[Union[VirtualApp, TestApp, Router, Portal, str]] = None) -> PortalBase: + if isinstance(arg, VirtualApp) and not portal: + portal = arg + elif isinstance(arg, TestApp) and not portal: + portal = arg + elif isinstance(arg, Router) and not portal: + portal = arg + elif isinstance(arg, Portal) and not portal: + portal = arg + elif isinstance(arg, str) and arg.endswith(".ini"): + portal = arg + elif isinstance(arg, str) and not env: + env = arg + elif (isinstance(arg, dict) or isinstance(arg, tuple)) and not key: + key = arg + self._vapp = None + self._key = None + self._key_pair = None + self._server = None + if isinstance(portal, Portal): + self._vapp = portal._vapp + self._key = portal._key + self._key_pair = portal._key_pair + self._server = portal._server + elif isinstance(portal, (VirtualApp, TestApp)): + self._vapp = portal + elif isinstance(portal, (Router, str)): + self._vapp = PortalBase._create_testapp(portal) + elif isinstance(key, dict): + self._key = key + self._key_pair = (key.get("key"), key.get("secret")) if key else None + elif isinstance(key, tuple) and len(key) >= 2: + self._key = {"key": key[0], "secret": key[1]} + self._key_pair = key + elif isinstance(env, str): + key_managers = {APP_CGAP: CGAPKeyManager, APP_FOURFRONT: FourfrontKeyManager, APP_SMAHT: SMaHTKeyManager} + if not (key_manager := key_managers.get(app)) or not (key_manager := key_manager()): + raise Exception(f"Invalid app name: {app} (valid: {', '.join(ORCHESTRATED_APPS)}).") + if isinstance(env, str): + self._key = key_manager.get_keydict_for_env(env) + self._server = self._key.get("server") if self._key else None + elif isinstance(server, str): + self._key = key_manager.get_keydict_for_server(server) + self._server = server + self._key_pair = key_manager.keydict_to_keypair(self._key) if self._key else None + + def get_metadata(self, object_id: str) -> Optional[dict]: + return get_metadata(obj_id=object_id, vapp=self._vapp, key=self._key) + + def patch_metadata(self, object_id: str, data: str) -> Optional[dict]: + if self._key: + return patch_metadata(obj_id=object_id, patch_item=data, key=self._key) + return self.patch(f"/{object_id}", data) + + def post_metadata(self, object_type: str, data: str) -> Optional[dict]: + if self._key: + return post_metadata(schema_name=object_type, post_item=data, key=self._key) + return self.post(f"/{object_type}", data) + + def get_schema(self, schema_name: str) -> Optional[dict]: + return get_schema(schema_name, portal_vapp=self._vapp, key=self._key) + + def get(self, uri: str, follow: bool = True, **kwargs) -> Optional[Union[RequestResponse, TestResponse]]: + if isinstance(self._vapp, (VirtualApp, TestApp)): + response = self._vapp.get(self._uri(uri), **self._kwargs(**kwargs)) + if response and response.status_code in [301, 302, 303, 307, 308] and follow: + response = response.follow() + return response + return requests.get(self._uri(uri), allow_redirects=follow, **self._kwargs(**kwargs)) + + def patch(self, uri: str, data: Optional[dict] = None, + json: Optional[dict] = None, **kwargs) -> Optional[Union[RequestResponse, TestResponse]]: + if isinstance(self._vapp, (VirtualApp, TestApp)): + return self._vapp.patch_json(self._uri(uri), json or data, **self._kwargs(**kwargs)) + return requests.patch(self._uri(uri), json=json or data, **self._kwargs(**kwargs)) + + def post(self, uri: str, data: Optional[dict] = None, json: Optional[dict] = None, + files: Optional[dict] = None, **kwargs) -> Optional[Union[RequestResponse, TestResponse]]: + if isinstance(self._vapp, (VirtualApp, TestApp)): + if files: + return self._vapp.post(self._uri(uri), json or data, upload_files=files, **self._kwargs(**kwargs)) + else: + return self._vapp.post_json(self._uri(uri), json or data, upload_files=files, **self._kwargs(**kwargs)) + return requests.post(self._uri(uri), json=json or data, files=files, **self._kwargs(**kwargs)) + + def _uri(self, uri: str) -> str: + if not isinstance(uri, str) or not uri: + return "/" + if uri.lower().startswith("http://") or uri.lower().startswith("https://"): + return uri + uri = re.sub(r"/+", "/", uri) + return (self._server + ("/" if uri.startswith("/") else "") + uri) if self._server else uri + + def _kwargs(self, **kwargs) -> dict: + result_kwargs = {"headers": + kwargs.get("headers", {"Content-type": "application/json", "Accept": "application/json"})} + if self._key_pair: + result_kwargs["auth"] = self._key_pair + if isinstance(timeout := kwargs.get("timeout"), int): + result_kwargs["timeout"] = timeout + return result_kwargs + + @staticmethod + def create_for_testing(ini_file: Optional[str] = None) -> PortalBase: + if isinstance(ini_file, str): + return Portal(Portal._create_testapp(ini_file)) + minimal_ini_for_unit_testing = "[app:app]\nuse = egg:encoded\nsqlalchemy.url = postgresql://dummy\n" + with temporary_file(content=minimal_ini_for_unit_testing, suffix=".ini") as ini_file: + return Portal(Portal._create_testapp(ini_file)) + + @staticmethod + def create_for_testing_local(ini_file: Optional[str] = None) -> Portal: + if isinstance(ini_file, str): + return Portal(Portal._create_testapp(ini_file)) + minimal_ini_for_testing_local = "\n".join([ + "[app:app]\nuse = egg:encoded\nfile_upload_bucket = dummy", + "sqlalchemy.url = postgresql://postgres@localhost:5441/postgres?host=/tmp/snovault/pgdata", + "multiauth.groupfinder = encoded.authorization.smaht_groupfinder", + "multiauth.policies = auth0 session remoteuser accesskey", + "multiauth.policy.session.namespace = mailto", + "multiauth.policy.session.use = encoded.authentication.NamespacedAuthenticationPolicy", + "multiauth.policy.session.base = pyramid.authentication.SessionAuthenticationPolicy", + "multiauth.policy.remoteuser.namespace = remoteuser", + "multiauth.policy.remoteuser.use = encoded.authentication.NamespacedAuthenticationPolicy", + "multiauth.policy.remoteuser.base = pyramid.authentication.RemoteUserAuthenticationPolicy", + "multiauth.policy.accesskey.namespace = accesskey", + "multiauth.policy.accesskey.use = encoded.authentication.NamespacedAuthenticationPolicy", + "multiauth.policy.accesskey.base = encoded.authentication.BasicAuthAuthenticationPolicy", + "multiauth.policy.accesskey.check = encoded.authentication.basic_auth_check", + "multiauth.policy.auth0.use = encoded.authentication.NamespacedAuthenticationPolicy", + "multiauth.policy.auth0.namespace = auth0", + "multiauth.policy.auth0.base = encoded.authentication.Auth0AuthenticationPolicy" + ]) + with temporary_file(content=minimal_ini_for_testing_local, suffix=".ini") as minimal_ini_file: + return Portal(Portal._create_testapp(minimal_ini_file)) + + @staticmethod + def _create_testapp(value: Union[str, Router, TestApp] = "development.ini") -> TestApp: + """ + Creates and returns a TestApp; and also adds a get_with_follow method to it. + Refactored out of above loadxl code (2023-09) to consolidate at a single point, + and also for use by the generate_local_access_key and view_local_object scripts. + """ + if isinstance(value, TestApp): + return value + app = value if isinstance(value, Router) else get_app(value, "app") + return TestApp(app, {"HTTP_ACCEPT": "application/json", "REMOTE_USER": "TEST"}) + + +class Portal(PortalBase): - def __init__(self, portal: PortalAny, data: Optional[dict] = None, schemas: Optional[List[dict]] = None) -> None: - self.vapp = portal.vapp if isinstance(portal, Portal) else portal - self._data = data # Data set being loaded (e.g. by StructuredDataSet). - self._schemas = schemas # Explicitly specified known schemas. + def __init__(self, + arg: Optional[Union[VirtualApp, TestApp, Router, Portal, str]] = None, + env: Optional[str] = None, app: OrchestratedApp = APP_SMAHT, server: Optional[str] = None, + key: Optional[Union[dict, tuple]] = None, + portal: Optional[Union[VirtualApp, TestApp, Router, Portal, str]] = None, + data: Optional[dict] = None, schemas: Optional[List[dict]] = None) -> Optional[Portal]: + super(Portal, self).__init__(arg, env=env, app=app, server=server, key=key, portal=portal) + if isinstance(arg, Portal) and not portal: + portal = arg + if isinstance(portal, Portal): + self._schemas = schemas if schemas is not None else portal._schemas # Explicitly specified/known schemas. + self._data = data if data is not None else portal._data # Data set being loaded; e.g. by StructuredDataSet. + else: + self._schemas = schemas + self._data = data + + @lru_cache(maxsize=256) + def get_metadata(self, object_name: str) -> Optional[dict]: + try: + return super(Portal, self).get_metadata(object_name) + except Exception: + return None @lru_cache(maxsize=256) def get_schema(self, schema_name: str) -> Optional[dict]: - def get_schema_internal(schema_name: str) -> Optional[dict]: + def get_schema_exact(schema_name: str) -> Optional[dict]: # noqa return (next((schema for schema in self._schemas or [] if Schema.type_name(schema.get("title")) == Schema.type_name(schema_name)), None) or - get_schema(schema_name, portal_vapp=self.vapp)) + super(Portal, self).get_schema(schema_name)) try: - if (schema := get_schema_internal(schema_name)): + if (schema := get_schema_exact(schema_name)): return schema except Exception: # Try/force camel-case if all upper/lower-case. if schema_name == schema_name.upper(): - if (schema := get_schema_internal(schema_name.lower().title())): + if (schema := get_schema_exact(schema_name.lower().title())): return schema elif schema_name == schema_name.lower(): - if (schema := get_schema_internal(schema_name.title())): + if (schema := get_schema_exact(schema_name.title())): return schema raise - @lru_cache(maxsize=256) - def get_metadata(self, object_name: str) -> Optional[dict]: - try: - return get_metadata(object_name, vapp=self.vapp) - except Exception: - return None - def ref_exists(self, type_name: str, value: str) -> bool: if self._data and (items := self._data.get(type_name)) and (schema := self.get_schema(type_name)): iproperties = set(schema.get("identifyingProperties", [])) | {"identifier", "uuid"} @@ -507,15 +682,12 @@ def ref_exists(self, type_name: str, value: str) -> bool: return self.get_metadata(f"/{type_name}/{value}") is not None @staticmethod - def create(portal: Optional[PortalAny] = None, - data: Optional[dict] = None, schemas: Optional[List[dict]] = None) -> Optional[Portal]: - if isinstance(portal, Portal): - if data is not None: - portal._data = data - if schemas is not None: - portal._schemas = schemas - return portal - return Portal(portal, data=data, schemas=schemas) if portal else None + def create_for_testing(ini_file: Optional[str] = None, schemas: Optional[List[dict]] = None) -> Portal: + return Portal(PortalBase.create_for_testing(ini_file), schemas=schemas) + + @staticmethod + def create_for_testing_local(ini_file: Optional[str] = None, schemas: Optional[List[dict]] = None) -> Portal: + return Portal(PortalBase.create_for_testing_local(ini_file), schemas=schemas) def _split_dotted_string(value: str): From c1becacba2b03df812c31a1a083adec6e1ac39ac Mon Sep 17 00:00:00 2001 From: David Michaels Date: Sun, 3 Dec 2023 11:14:55 -0500 Subject: [PATCH 05/53] Moved structured_data.py here from smaht-portal. --- dcicutils/structured_data.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dcicutils/structured_data.py b/dcicutils/structured_data.py index 0f38dde03..4d7cf8839 100644 --- a/dcicutils/structured_data.py +++ b/dcicutils/structured_data.py @@ -477,7 +477,7 @@ def array_indices(name: str) -> Tuple[Optional[str], Optional[List[int]]]: class PortalBase: def __init__(self, - arg: Optional[Union[VirtualApp, TestApp, Router, Portal, str]] = None, + arg: Optional[Union[VirtualApp, TestApp, Router, Portal, dict, tuple, str]] = None, env: Optional[str] = None, app: OrchestratedApp = APP_SMAHT, server: Optional[str] = None, key: Optional[Union[dict, tuple]] = None, portal: Optional[Union[VirtualApp, TestApp, Router, Portal, str]] = None) -> PortalBase: @@ -489,7 +489,7 @@ def __init__(self, portal = arg elif isinstance(arg, Portal) and not portal: portal = arg - elif isinstance(arg, str) and arg.endswith(".ini"): + elif isinstance(arg, str) and arg.endswith(".ini") and not portal: portal = arg elif isinstance(arg, str) and not env: env = arg @@ -632,7 +632,7 @@ def _create_testapp(value: Union[str, Router, TestApp] = "development.ini") -> T class Portal(PortalBase): def __init__(self, - arg: Optional[Union[VirtualApp, TestApp, Router, Portal, str]] = None, + arg: Optional[Union[VirtualApp, TestApp, Router, Portal, dict, tuple, str]] = None, env: Optional[str] = None, app: OrchestratedApp = APP_SMAHT, server: Optional[str] = None, key: Optional[Union[dict, tuple]] = None, portal: Optional[Union[VirtualApp, TestApp, Router, Portal, str]] = None, From 3f5f4de5d7a46513ea47bdd353b453fbd5967a5a Mon Sep 17 00:00:00 2001 From: David Michaels Date: Sun, 3 Dec 2023 11:19:02 -0500 Subject: [PATCH 06/53] Moved structured_data.py here from smaht-portal. --- dcicutils/structured_data.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/dcicutils/structured_data.py b/dcicutils/structured_data.py index 4d7cf8839..3ab2b10bc 100644 --- a/dcicutils/structured_data.py +++ b/dcicutils/structured_data.py @@ -619,9 +619,8 @@ def create_for_testing_local(ini_file: Optional[str] = None) -> Portal: @staticmethod def _create_testapp(value: Union[str, Router, TestApp] = "development.ini") -> TestApp: """ - Creates and returns a TestApp; and also adds a get_with_follow method to it. - Refactored out of above loadxl code (2023-09) to consolidate at a single point, - and also for use by the generate_local_access_key and view_local_object scripts. + Creates and returns a TestApp. Refactored out of above loadxl code to consolidate at a + single point; also for use by the generate_local_access_key and view_local_object scripts. """ if isinstance(value, TestApp): return value From 7d4d29e96894f3c2afca876c6320cd8275639bcc Mon Sep 17 00:00:00 2001 From: David Michaels Date: Sun, 3 Dec 2023 11:25:06 -0500 Subject: [PATCH 07/53] Moved structured_data.py here from smaht-portal. --- CHANGELOG.rst | 1 + pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 102315cda..8b1ffd0b7 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -9,6 +9,7 @@ Change Log 8.4.1 ===== * Strip sheet name in data_readers.Excel. +* Moved structured_data.py from smaht-portal to here. 8.4.0 diff --git a/pyproject.toml b/pyproject.toml index 5dc35daab..ca8f1ae80 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "dcicutils" -version = "8.4.0.1b1" # TODO: To become 8.4.1 +version = "8.4.0.1b2" # TODO: To become 8.4.1 description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources" authors = ["4DN-DCIC Team "] license = "MIT" From 213ac66db71bda80942e99effd0e63e48c5eb529 Mon Sep 17 00:00:00 2001 From: David Michaels Date: Sun, 3 Dec 2023 11:31:07 -0500 Subject: [PATCH 08/53] updated dcicutils.rst --- docs/source/dcicutils.rst | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/docs/source/dcicutils.rst b/docs/source/dcicutils.rst index f8b902536..906c09fbc 100644 --- a/docs/source/dcicutils.rst +++ b/docs/source/dcicutils.rst @@ -316,6 +316,13 @@ ssl_certificate_utils :members: +structured_data +^^^^^^^^^^ + +.. automodule:: dcicutils.structured_data + :members: + + task_utils ^^^^^^^^^^ From e1831e4cdd65683c3274db00ab9c84cf77d7f2d6 Mon Sep 17 00:00:00 2001 From: David Michaels Date: Sun, 3 Dec 2023 11:34:13 -0500 Subject: [PATCH 09/53] updated dcicutils.rst --- docs/source/dcicutils.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/dcicutils.rst b/docs/source/dcicutils.rst index 906c09fbc..24d41daf6 100644 --- a/docs/source/dcicutils.rst +++ b/docs/source/dcicutils.rst @@ -317,7 +317,7 @@ ssl_certificate_utils structured_data -^^^^^^^^^^ +^^^^^^^^^^^^^^^ .. automodule:: dcicutils.structured_data :members: From 6f70e180f253b6fe6b6788a8d24015c92dbfe118 Mon Sep 17 00:00:00 2001 From: David Michaels Date: Sun, 3 Dec 2023 12:01:24 -0500 Subject: [PATCH 10/53] Added pyramid to pyproject --- poetry.lock | 298 ++++++++++++++++++++++++++++++++++--------------- pyproject.toml | 3 +- 2 files changed, 210 insertions(+), 91 deletions(-) diff --git a/poetry.lock b/poetry.lock index 73ee5796f..9c93ec47a 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,10 +1,9 @@ -# This file is automatically @generated by Poetry 1.4.2 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.5.1 and should not be changed by hand. [[package]] name = "async-timeout" version = "4.0.2" description = "Timeout context manager for asyncio programs" -category = "main" optional = false python-versions = ">=3.6" files = [ @@ -16,7 +15,6 @@ files = [ name = "attrs" version = "23.1.0" description = "Classes Without Boilerplate" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -35,7 +33,6 @@ tests-no-zope = ["cloudpickle", "hypothesis", "mypy (>=1.1.1)", "pympler", "pyte name = "aws-requests-auth" version = "0.4.3" description = "AWS signature version 4 signing process for the python requests module" -category = "main" optional = false python-versions = "*" files = [ @@ -50,7 +47,6 @@ requests = ">=0.14.0" name = "beautifulsoup4" version = "4.12.2" description = "Screen-scraping library" -category = "main" optional = false python-versions = ">=3.6.0" files = [ @@ -69,7 +65,6 @@ lxml = ["lxml"] name = "boto3" version = "1.28.67" description = "The AWS SDK for Python" -category = "main" optional = false python-versions = ">= 3.7" files = [ @@ -89,7 +84,6 @@ crt = ["botocore[crt] (>=1.21.0,<2.0a0)"] name = "boto3-stubs" version = "1.28.67" description = "Type annotations for boto3 1.28.67 generated with mypy-boto3-builder 7.19.0" -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -468,7 +462,6 @@ xray = ["mypy-boto3-xray (>=1.28.0,<1.29.0)"] name = "botocore" version = "1.31.67" description = "Low-level, data-driven core of boto 3." -category = "main" optional = false python-versions = ">= 3.7" files = [ @@ -491,7 +484,6 @@ crt = ["awscrt (==0.16.26)"] name = "botocore-stubs" version = "1.31.67" description = "Type annotations and code completion for botocore" -category = "dev" optional = false python-versions = ">=3.7,<4.0" files = [ @@ -507,7 +499,6 @@ typing-extensions = {version = ">=4.1.0", markers = "python_version < \"3.9\""} name = "certifi" version = "2023.7.22" description = "Python package for providing Mozilla's CA Bundle." -category = "main" optional = false python-versions = ">=3.6" files = [ @@ -519,7 +510,6 @@ files = [ name = "cffi" version = "1.15.1" description = "Foreign Function Interface for Python calling C code." -category = "main" optional = false python-versions = "*" files = [ @@ -596,7 +586,6 @@ pycparser = "*" name = "chardet" version = "5.2.0" description = "Universal encoding detector for Python 3" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -608,7 +597,6 @@ files = [ name = "charset-normalizer" version = "3.2.0" description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." -category = "main" optional = false python-versions = ">=3.7.0" files = [ @@ -693,7 +681,6 @@ files = [ name = "colorama" version = "0.4.6" description = "Cross-platform colored terminal text." -category = "main" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" files = [ @@ -705,7 +692,6 @@ files = [ name = "coverage" version = "7.2.7" description = "Code coverage measurement for Python" -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -781,7 +767,6 @@ toml = ["tomli"] name = "cryptography" version = "41.0.3" description = "cryptography is a package which provides cryptographic recipes and primitives to Python developers." -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -827,7 +812,6 @@ test-randomorder = ["pytest-randomly"] name = "docker" version = "4.4.4" description = "A Python library for the Docker Engine API." -category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" files = [ @@ -849,7 +833,6 @@ tls = ["cryptography (>=1.3.4)", "idna (>=2.0.0)", "pyOpenSSL (>=17.5.0)"] name = "elasticsearch" version = "7.13.4" description = "Python client for Elasticsearch" -category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, <4" files = [ @@ -871,7 +854,6 @@ requests = ["requests (>=2.4.0,<3.0.0)"] name = "et-xmlfile" version = "1.1.0" description = "An implementation of lxml.xmlfile for the standard library" -category = "main" optional = false python-versions = ">=3.6" files = [ @@ -883,7 +865,6 @@ files = [ name = "exceptiongroup" version = "1.1.2" description = "Backport of PEP 654 (exception groups)" -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -898,7 +879,6 @@ test = ["pytest (>=6)"] name = "flake8" version = "5.0.4" description = "the modular source code checker: pep8 pyflakes and co" -category = "dev" optional = false python-versions = ">=3.6.1" files = [ @@ -915,7 +895,6 @@ pyflakes = ">=2.5.0,<2.6.0" name = "flaky" version = "3.7.0" description = "Plugin for nose or pytest that automatically reruns flaky tests." -category = "dev" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" files = [ @@ -927,7 +906,6 @@ files = [ name = "gitdb" version = "4.0.10" description = "Git Object Database" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -942,7 +920,6 @@ smmap = ">=3.0.1,<6" name = "gitpython" version = "3.1.32" description = "GitPython is a Python library used to interact with Git repositories" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -953,11 +930,25 @@ files = [ [package.dependencies] gitdb = ">=4.0.1,<5" +[[package]] +name = "hupper" +version = "1.12" +description = "Integrated process monitor for developing and reloading daemons." +optional = false +python-versions = ">=3.7" +files = [ + {file = "hupper-1.12-py3-none-any.whl", hash = "sha256:b8bc41bb75939e816f30f118026d0ba99544af4d6992583df3b4813765af27ef"}, + {file = "hupper-1.12.tar.gz", hash = "sha256:18b1653d9832c9f8e7d3401986c7e7af2ae6783616be0bc406bfe0b14134a5c6"}, +] + +[package.extras] +docs = ["Sphinx", "pylons-sphinx-themes", "setuptools", "watchdog"] +testing = ["mock", "pytest", "pytest-cov", "watchdog"] + [[package]] name = "idna" version = "3.4" description = "Internationalized Domain Names in Applications (IDNA)" -category = "main" optional = false python-versions = ">=3.5" files = [ @@ -969,7 +960,6 @@ files = [ name = "importlib-resources" version = "6.1.0" description = "Read resources from Python packages" -category = "main" optional = false python-versions = ">=3.8" files = [ @@ -988,7 +978,6 @@ testing = ["pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", name = "iniconfig" version = "2.0.0" description = "brain-dead simple config-ini parsing" -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -1000,7 +989,6 @@ files = [ name = "jmespath" version = "1.0.1" description = "JSON Matching Expressions" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1012,7 +1000,6 @@ files = [ name = "jsonc-parser" version = "1.1.5" description = "A lightweight, native tool for parsing .jsonc files" -category = "main" optional = false python-versions = ">=3.5" files = [ @@ -1024,7 +1011,6 @@ files = [ name = "jsonschema" version = "4.19.1" description = "An implementation of JSON Schema validation for Python" -category = "main" optional = false python-versions = ">=3.8" files = [ @@ -1048,7 +1034,6 @@ format-nongpl = ["fqdn", "idna", "isoduration", "jsonpointer (>1.13)", "rfc3339- name = "jsonschema-specifications" version = "2023.7.1" description = "The JSON Schema meta-schemas and vocabularies, exposed as a Registry" -category = "main" optional = false python-versions = ">=3.8" files = [ @@ -1064,7 +1049,6 @@ referencing = ">=0.28.0" name = "mccabe" version = "0.7.0" description = "McCabe checker, plugin for flake8" -category = "dev" optional = false python-versions = ">=3.6" files = [ @@ -1076,7 +1060,6 @@ files = [ name = "mirakuru" version = "2.5.1" description = "Process executor (not only) for tests." -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -1091,7 +1074,6 @@ psutil = {version = ">=4.0.0", markers = "sys_platform != \"cygwin\""} name = "openpyxl" version = "3.1.2" description = "A Python library to read/write Excel 2010 xlsx/xlsm files" -category = "main" optional = false python-versions = ">=3.6" files = [ @@ -1106,7 +1088,6 @@ et-xmlfile = "*" name = "opensearch-py" version = "2.3.0" description = "Python client for OpenSearch" -category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, <4" files = [ @@ -1131,7 +1112,6 @@ kerberos = ["requests-kerberos"] name = "packaging" version = "23.1" description = "Core utilities for Python packages" -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -1139,11 +1119,26 @@ files = [ {file = "packaging-23.1.tar.gz", hash = "sha256:a392980d2b6cffa644431898be54b0045151319d1e7ec34f0cfed48767dd334f"}, ] +[[package]] +name = "pastedeploy" +version = "3.1.0" +description = "Load, configure, and compose WSGI applications and servers" +optional = false +python-versions = ">=3.7" +files = [ + {file = "PasteDeploy-3.1.0-py3-none-any.whl", hash = "sha256:76388ad53a661448d436df28c798063108f70e994ddc749540d733cdbd1b38cf"}, + {file = "PasteDeploy-3.1.0.tar.gz", hash = "sha256:9ddbaf152f8095438a9fe81f82c78a6714b92ae8e066bed418b6a7ff6a095a95"}, +] + +[package.extras] +docs = ["Sphinx (>=1.7.5)", "pylons-sphinx-themes"] +paste = ["Paste"] +testing = ["Paste", "pytest", "pytest-cov"] + [[package]] name = "pip-licenses" version = "4.3.3" description = "Dump the software license list of Python packages installed with pip." -category = "dev" optional = false python-versions = "~=3.8" files = [ @@ -1161,7 +1156,6 @@ test = ["docutils", "mypy", "pytest-cov", "pytest-pycodestyle", "pytest-runner"] name = "pkgutil-resolve-name" version = "1.3.10" description = "Resolve a name to an object." -category = "main" optional = false python-versions = ">=3.6" files = [ @@ -1169,11 +1163,43 @@ files = [ {file = "pkgutil_resolve_name-1.3.10.tar.gz", hash = "sha256:357d6c9e6a755653cfd78893817c0853af365dd51ec97f3d358a819373bbd174"}, ] +[[package]] +name = "plaster" +version = "1.1.2" +description = "A loader interface around multiple config file formats." +optional = false +python-versions = ">=3.7" +files = [ + {file = "plaster-1.1.2-py2.py3-none-any.whl", hash = "sha256:42992ab1f4865f1278e2ad740e8ad145683bb4022e03534265528f0c23c0df2d"}, + {file = "plaster-1.1.2.tar.gz", hash = "sha256:f8befc54bf8c1147c10ab40297ec84c2676fa2d4ea5d6f524d9436a80074ef98"}, +] + +[package.extras] +docs = ["Sphinx", "pylons-sphinx-themes"] +testing = ["pytest", "pytest-cov"] + +[[package]] +name = "plaster-pastedeploy" +version = "1.0.1" +description = "A loader implementing the PasteDeploy syntax to be used by plaster." +optional = false +python-versions = ">=3.7" +files = [ + {file = "plaster_pastedeploy-1.0.1-py2.py3-none-any.whl", hash = "sha256:ad3550cc744648969ed3b810f33c9344f515ee8d8a8cec18e8f2c4a643c2181f"}, + {file = "plaster_pastedeploy-1.0.1.tar.gz", hash = "sha256:be262e6d2e41a7264875daa2fe2850cbb0615728bcdc92828fdc72736e381412"}, +] + +[package.dependencies] +PasteDeploy = ">=2.0" +plaster = ">=0.5" + +[package.extras] +testing = ["pytest", "pytest-cov"] + [[package]] name = "pluggy" version = "1.2.0" description = "plugin and hook calling mechanisms for python" -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -1189,7 +1215,6 @@ testing = ["pytest", "pytest-benchmark"] name = "port-for" version = "0.7.1" description = "Utility that helps with local TCP ports management. It can find an unused TCP localhost port and remember the association." -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -1201,7 +1226,6 @@ files = [ name = "prettytable" version = "3.9.0" description = "A simple Python library for easily displaying tabular data in a visually appealing ASCII table format" -category = "dev" optional = false python-versions = ">=3.8" files = [ @@ -1219,7 +1243,6 @@ tests = ["pytest", "pytest-cov", "pytest-lazy-fixture"] name = "psutil" version = "5.9.5" description = "Cross-platform lib for process and system monitoring in Python." -category = "dev" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" files = [ @@ -1246,7 +1269,6 @@ test = ["enum34", "ipaddress", "mock", "pywin32", "wmi"] name = "pycodestyle" version = "2.9.1" description = "Python style guide checker" -category = "dev" optional = false python-versions = ">=3.6" files = [ @@ -1258,7 +1280,6 @@ files = [ name = "pycparser" version = "2.21" description = "C parser in Python" -category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" files = [ @@ -1270,7 +1291,6 @@ files = [ name = "pyflakes" version = "2.5.0" description = "passive checker of Python programs" -category = "dev" optional = false python-versions = ">=3.6" files = [ @@ -1282,7 +1302,6 @@ files = [ name = "pyjwt" version = "2.8.0" description = "JSON Web Token implementation in Python" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1300,7 +1319,6 @@ tests = ["coverage[toml] (==5.0.4)", "pytest (>=6.0.0,<7.0.0)"] name = "pyopenssl" version = "23.2.0" description = "Python wrapper module around the OpenSSL library" -category = "main" optional = false python-versions = ">=3.6" files = [ @@ -1315,11 +1333,36 @@ cryptography = ">=38.0.0,<40.0.0 || >40.0.0,<40.0.1 || >40.0.1,<42" docs = ["sphinx (!=5.2.0,!=5.2.0.post0)", "sphinx-rtd-theme"] test = ["flaky", "pretend", "pytest (>=3.0.1)"] +[[package]] +name = "pyramid" +version = "1.10.4" +description = "The Pyramid Web Framework, a Pylons project" +optional = false +python-versions = ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*" +files = [ + {file = "pyramid-1.10.4-py2.py3-none-any.whl", hash = "sha256:51bf64647345237c00d2fe558935e0e4938c156e29f17e203457fd8e1d757dc7"}, + {file = "pyramid-1.10.4.tar.gz", hash = "sha256:d80ccb8cfa550139b50801591d4ca8a5575334adb493c402fce2312f55d07d66"}, +] + +[package.dependencies] +hupper = ">=1.5" +plaster = "*" +plaster-pastedeploy = "*" +setuptools = "*" +translationstring = ">=0.4" +venusian = ">=1.0" +webob = ">=1.8.3" +"zope.deprecation" = ">=3.5.0" +"zope.interface" = ">=3.8.0" + +[package.extras] +docs = ["Sphinx (>=1.8.1)", "docutils", "pylons-sphinx-latesturl", "pylons-sphinx-themes (>=1.0.8)", "repoze.sphinx.autointerface", "sphinxcontrib-autoprogram"] +testing = ["coverage", "nose", "virtualenv", "webtest (>=1.3.1)", "zope.component (>=4.0)"] + [[package]] name = "pytest" version = "7.4.2" description = "pytest: simple powerful testing with Python" -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -1342,7 +1385,6 @@ testing = ["argcomplete", "attrs (>=19.2.0)", "hypothesis (>=3.56)", "mock", "no name = "pytest-cov" version = "4.1.0" description = "Pytest plugin for measuring coverage." -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -1361,7 +1403,6 @@ testing = ["fields", "hunter", "process-tests", "pytest-xdist", "six", "virtuale name = "pytest-mock" version = "3.11.1" description = "Thin-wrapper around the mock package for easier use with pytest" -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -1379,7 +1420,6 @@ dev = ["pre-commit", "pytest-asyncio", "tox"] name = "pytest-redis" version = "3.0.2" description = "Redis fixtures and fixture factories for Pytest." -category = "dev" optional = false python-versions = ">=3.8" files = [ @@ -1397,7 +1437,6 @@ redis = ">=3" name = "pytest-runner" version = "6.0.0" description = "Invoke py.test as distutils command with dependency resolution" -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -1413,7 +1452,6 @@ testing = ["pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", name = "python-dateutil" version = "2.8.2" description = "Extensions to the standard Python datetime module" -category = "main" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" files = [ @@ -1428,7 +1466,6 @@ six = ">=1.5" name = "pytz" version = "2023.3" description = "World timezone definitions, modern and historical" -category = "main" optional = false python-versions = "*" files = [ @@ -1440,7 +1477,6 @@ files = [ name = "pywin32" version = "227" description = "Python for Window Extensions" -category = "main" optional = false python-versions = "*" files = [ @@ -1462,7 +1498,6 @@ files = [ name = "pyyaml" version = "6.0.1" description = "YAML parser and emitter for Python" -category = "main" optional = false python-versions = ">=3.6" files = [ @@ -1471,7 +1506,6 @@ files = [ {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:69b023b2b4daa7548bcfbd4aa3da05b3a74b772db9e23b982788168117739938"}, {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:81e0b275a9ecc9c0c0c07b4b90ba548307583c125f54d5b6946cfee6360c733d"}, {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba336e390cd8e4d1739f42dfe9bb83a3cc2e80f567d8805e11b46f4a943f5515"}, - {file = "PyYAML-6.0.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:326c013efe8048858a6d312ddd31d56e468118ad4cdeda36c719bf5bb6192290"}, {file = "PyYAML-6.0.1-cp310-cp310-win32.whl", hash = "sha256:bd4af7373a854424dabd882decdc5579653d7868b8fb26dc7d0e99f823aa5924"}, {file = "PyYAML-6.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:fd1592b3fdf65fff2ad0004b5e363300ef59ced41c2e6b3a99d4089fa8c5435d"}, {file = "PyYAML-6.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6965a7bc3cf88e5a1c3bd2e0b5c22f8d677dc88a455344035f03399034eb3007"}, @@ -1479,15 +1513,8 @@ files = [ {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42f8152b8dbc4fe7d96729ec2b99c7097d656dc1213a3229ca5383f973a5ed6d"}, {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:062582fca9fabdd2c8b54a3ef1c978d786e0f6b3a1510e0ac93ef59e0ddae2bc"}, {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2b04aac4d386b172d5b9692e2d2da8de7bfb6c387fa4f801fbf6fb2e6ba4673"}, - {file = "PyYAML-6.0.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e7d73685e87afe9f3b36c799222440d6cf362062f78be1013661b00c5c6f678b"}, {file = "PyYAML-6.0.1-cp311-cp311-win32.whl", hash = "sha256:1635fd110e8d85d55237ab316b5b011de701ea0f29d07611174a1b42f1444741"}, {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, - {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"}, - {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"}, - {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"}, - {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"}, - {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"}, - {file = "PyYAML-6.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:0d3304d8c0adc42be59c5f8a4d9e3d7379e6955ad754aa9d6ab7a398b59dd1df"}, {file = "PyYAML-6.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:50550eb667afee136e9a77d6dc71ae76a44df8b3e51e41b77f6de2932bfe0f47"}, {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1fe35611261b29bd1de0070f0b2f47cb6ff71fa6595c077e42bd0c419fa27b98"}, {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:704219a11b772aea0d8ecd7058d0082713c3562b4e271b849ad7dc4a5c90c13c"}, @@ -1504,7 +1531,6 @@ files = [ {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0cd17c15d3bb3fa06978b4e8958dcdc6e0174ccea823003a106c7d4d7899ac5"}, {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:28c119d996beec18c05208a8bd78cbe4007878c6dd15091efb73a30e90539696"}, {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7e07cbde391ba96ab58e532ff4803f79c4129397514e1413a7dc761ccd755735"}, - {file = "PyYAML-6.0.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:49a183be227561de579b4a36efbb21b3eab9651dd81b1858589f796549873dd6"}, {file = "PyYAML-6.0.1-cp38-cp38-win32.whl", hash = "sha256:184c5108a2aca3c5b3d3bf9395d50893a7ab82a38004c8f61c258d4428e80206"}, {file = "PyYAML-6.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:1e2722cc9fbb45d9b87631ac70924c11d3a401b2d7f410cc0e3bbf249f2dca62"}, {file = "PyYAML-6.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9eb6caa9a297fc2c2fb8862bc5370d0303ddba53ba97e71f08023b6cd73d16a8"}, @@ -1512,7 +1538,6 @@ files = [ {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5773183b6446b2c99bb77e77595dd486303b4faab2b086e7b17bc6bef28865f6"}, {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b786eecbdf8499b9ca1d697215862083bd6d2a99965554781d0d8d1ad31e13a0"}, {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc1bf2925a1ecd43da378f4db9e4f799775d6367bdb94671027b73b393a7c42c"}, - {file = "PyYAML-6.0.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:04ac92ad1925b2cff1db0cfebffb6ffc43457495c9b3c39d3fcae417d7125dc5"}, {file = "PyYAML-6.0.1-cp39-cp39-win32.whl", hash = "sha256:faca3bdcf85b2fc05d06ff3fbc1f83e1391b3e724afa3feba7d13eeab355484c"}, {file = "PyYAML-6.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:510c9deebc5c0225e8c96813043e62b680ba2f9c50a08d3724c7f28a747d1486"}, {file = "PyYAML-6.0.1.tar.gz", hash = "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43"}, @@ -1522,7 +1547,6 @@ files = [ name = "redis" version = "4.6.0" description = "Python client for Redis database and key-value store" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1541,7 +1565,6 @@ ocsp = ["cryptography (>=36.0.1)", "pyopenssl (==20.0.1)", "requests (>=2.26.0)" name = "referencing" version = "0.30.2" description = "JSON Referencing + Python" -category = "main" optional = false python-versions = ">=3.8" files = [ @@ -1557,7 +1580,6 @@ rpds-py = ">=0.7.0" name = "requests" version = "2.31.0" description = "Python HTTP for Humans." -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1579,7 +1601,6 @@ use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] name = "rfc3986" version = "1.5.0" description = "Validating URI References per RFC 3986" -category = "main" optional = false python-versions = "*" files = [ @@ -1594,7 +1615,6 @@ idna2008 = ["idna"] name = "rpds-py" version = "0.10.6" description = "Python bindings to Rust's persistent data structures (rpds)" -category = "main" optional = false python-versions = ">=3.8" files = [ @@ -1703,7 +1723,6 @@ files = [ name = "s3transfer" version = "0.7.0" description = "An Amazon S3 Transfer Manager" -category = "main" optional = false python-versions = ">= 3.7" files = [ @@ -1717,11 +1736,26 @@ botocore = ">=1.12.36,<2.0a.0" [package.extras] crt = ["botocore[crt] (>=1.20.29,<2.0a.0)"] +[[package]] +name = "setuptools" +version = "69.0.2" +description = "Easily download, build, install, upgrade, and uninstall Python packages" +optional = false +python-versions = ">=3.8" +files = [ + {file = "setuptools-69.0.2-py3-none-any.whl", hash = "sha256:1e8fdff6797d3865f37397be788a4e3cba233608e9b509382a2777d25ebde7f2"}, + {file = "setuptools-69.0.2.tar.gz", hash = "sha256:735896e78a4742605974de002ac60562d286fa8051a7e2299445e8e8fbb01aa6"}, +] + +[package.extras] +docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "rst.linker (>=1.9)", "sphinx (<7.2.5)", "sphinx (>=3.5)", "sphinx-favicon", "sphinx-inline-tabs", "sphinx-lint", "sphinx-notfound-page (>=1,<2)", "sphinx-reredirects", "sphinxcontrib-towncrier"] +testing = ["build[virtualenv]", "filelock (>=3.4.0)", "flake8-2020", "ini2toml[lite] (>=0.9)", "jaraco.develop (>=7.21)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "pip (>=19.1)", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-mypy (>=0.9.1)", "pytest-perf", "pytest-ruff", "pytest-timeout", "pytest-xdist", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel"] +testing-integration = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "packaging (>=23.1)", "pytest", "pytest-enabler", "pytest-xdist", "tomli", "virtualenv (>=13.0.0)", "wheel"] + [[package]] name = "six" version = "1.16.0" description = "Python 2 and 3 compatibility utilities" -category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" files = [ @@ -1733,7 +1767,6 @@ files = [ name = "smmap" version = "5.0.0" description = "A pure Python implementation of a sliding window memory map manager" -category = "main" optional = false python-versions = ">=3.6" files = [ @@ -1745,7 +1778,6 @@ files = [ name = "soupsieve" version = "2.4.1" description = "A modern CSS selector implementation for Beautiful Soup." -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1757,7 +1789,6 @@ files = [ name = "structlog" version = "19.2.0" description = "Structured Logging for Python" -category = "main" optional = false python-versions = "*" files = [ @@ -1778,7 +1809,6 @@ tests = ["coverage", "freezegun (>=0.2.8)", "pretend", "pytest (>=3.3.0)", "pyth name = "toml" version = "0.10.2" description = "Python Library for Tom's Obvious, Minimal Language" -category = "main" optional = false python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*" files = [ @@ -1790,7 +1820,6 @@ files = [ name = "tomli" version = "2.0.1" description = "A lil' TOML parser" -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -1802,7 +1831,6 @@ files = [ name = "tqdm" version = "4.65.0" description = "Fast, Extensible Progress Meter" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1819,11 +1847,24 @@ notebook = ["ipywidgets (>=6)"] slack = ["slack-sdk"] telegram = ["requests"] +[[package]] +name = "translationstring" +version = "1.4" +description = "Utility library for i18n relied on by various Repoze and Pyramid packages" +optional = false +python-versions = "*" +files = [ + {file = "translationstring-1.4-py2.py3-none-any.whl", hash = "sha256:5f4dc4d939573db851c8d840551e1a0fb27b946afe3b95aafc22577eed2d6262"}, + {file = "translationstring-1.4.tar.gz", hash = "sha256:bf947538d76e69ba12ab17283b10355a9ecfbc078e6123443f43f2107f6376f3"}, +] + +[package.extras] +docs = ["Sphinx (>=1.3.1)", "docutils", "pylons-sphinx-themes"] + [[package]] name = "types-awscrt" version = "0.19.3" description = "Type annotations and code completion for awscrt" -category = "dev" optional = false python-versions = ">=3.7,<4.0" files = [ @@ -1835,7 +1876,6 @@ files = [ name = "types-s3transfer" version = "0.7.0" description = "Type annotations and code completion for s3transfer" -category = "dev" optional = false python-versions = ">=3.7,<4.0" files = [ @@ -1847,7 +1887,6 @@ files = [ name = "typing-extensions" version = "4.7.1" description = "Backported and Experimental Type Hints for Python 3.7+" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1859,7 +1898,6 @@ files = [ name = "urllib3" version = "1.26.16" description = "HTTP library with thread-safe connection pooling, file post, and more." -category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*" files = [ @@ -1872,11 +1910,25 @@ brotli = ["brotli (>=1.0.9)", "brotlicffi (>=0.8.0)", "brotlipy (>=0.6.0)"] secure = ["certifi", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "ipaddress", "pyOpenSSL (>=0.14)", "urllib3-secure-extra"] socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"] +[[package]] +name = "venusian" +version = "3.1.0" +description = "A library for deferring decorator actions" +optional = false +python-versions = ">=3.7" +files = [ + {file = "venusian-3.1.0-py3-none-any.whl", hash = "sha256:d1fb1e49927f42573f6c9b7c4fcf61c892af8fdcaa2314daa01d9a560b23488d"}, + {file = "venusian-3.1.0.tar.gz", hash = "sha256:eb72cdca6f3139a15dc80f9c95d3c10f8a54a0ba881eeef8e2ec5b42d3ee3a95"}, +] + +[package.extras] +docs = ["Sphinx (>=4.3.2)", "pylons-sphinx-themes", "repoze.sphinx.autointerface", "sphinx-copybutton"] +testing = ["coverage", "pytest", "pytest-cov"] + [[package]] name = "waitress" version = "2.1.2" description = "Waitress WSGI server" -category = "main" optional = false python-versions = ">=3.7.0" files = [ @@ -1892,7 +1944,6 @@ testing = ["coverage (>=5.0)", "pytest", "pytest-cover"] name = "wcwidth" version = "0.2.8" description = "Measures the displayed width of unicode strings in a terminal" -category = "dev" optional = false python-versions = "*" files = [ @@ -1904,7 +1955,6 @@ files = [ name = "webob" version = "1.8.7" description = "WSGI request and response object" -category = "main" optional = false python-versions = ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*" files = [ @@ -1920,7 +1970,6 @@ testing = ["coverage", "pytest (>=3.1.0)", "pytest-cov", "pytest-xdist"] name = "websocket-client" version = "1.6.1" description = "WebSocket client for Python with low level API options" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1937,7 +1986,6 @@ test = ["websockets"] name = "webtest" version = "2.0.35" description = "Helper to test WSGI applications" -category = "main" optional = false python-versions = "*" files = [ @@ -1959,7 +2007,6 @@ tests = ["PasteDeploy", "WSGIProxy2", "coverage", "mock", "nose (<1.3.0)", "pyqu name = "zipp" version = "3.17.0" description = "Backport of pathlib-compatible object wrapper for zip files" -category = "main" optional = false python-versions = ">=3.8" files = [ @@ -1971,7 +2018,78 @@ files = [ docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (<7.2.5)", "sphinx (>=3.5)", "sphinx-lint"] testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-ignore-flaky", "pytest-mypy (>=0.9.1)", "pytest-ruff"] +[[package]] +name = "zope-deprecation" +version = "5.0" +description = "Zope Deprecation Infrastructure" +optional = false +python-versions = ">= 3.7" +files = [ + {file = "zope.deprecation-5.0-py3-none-any.whl", hash = "sha256:28c2ee983812efb4676d33c7a8c6ade0df191c1c6d652bbbfe6e2eeee067b2d4"}, + {file = "zope.deprecation-5.0.tar.gz", hash = "sha256:b7c32d3392036b2145c40b3103e7322db68662ab09b7267afe1532a9d93f640f"}, +] + +[package.dependencies] +setuptools = "*" + +[package.extras] +docs = ["Sphinx"] +test = ["zope.testrunner"] + +[[package]] +name = "zope-interface" +version = "6.1" +description = "Interfaces for Python" +optional = false +python-versions = ">=3.7" +files = [ + {file = "zope.interface-6.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:43b576c34ef0c1f5a4981163b551a8781896f2a37f71b8655fd20b5af0386abb"}, + {file = "zope.interface-6.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:67be3ca75012c6e9b109860820a8b6c9a84bfb036fbd1076246b98e56951ca92"}, + {file = "zope.interface-6.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9b9bc671626281f6045ad61d93a60f52fd5e8209b1610972cf0ef1bbe6d808e3"}, + {file = "zope.interface-6.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bbe81def9cf3e46f16ce01d9bfd8bea595e06505e51b7baf45115c77352675fd"}, + {file = "zope.interface-6.1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6dc998f6de015723196a904045e5a2217f3590b62ea31990672e31fbc5370b41"}, + {file = "zope.interface-6.1-cp310-cp310-win_amd64.whl", hash = "sha256:239a4a08525c080ff833560171d23b249f7f4d17fcbf9316ef4159f44997616f"}, + {file = "zope.interface-6.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:9ffdaa5290422ac0f1688cb8adb1b94ca56cee3ad11f29f2ae301df8aecba7d1"}, + {file = "zope.interface-6.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:34c15ca9248f2e095ef2e93af2d633358c5f048c49fbfddf5fdfc47d5e263736"}, + {file = "zope.interface-6.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b012d023b4fb59183909b45d7f97fb493ef7a46d2838a5e716e3155081894605"}, + {file = "zope.interface-6.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:97806e9ca3651588c1baaebb8d0c5ee3db95430b612db354c199b57378312ee8"}, + {file = "zope.interface-6.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fddbab55a2473f1d3b8833ec6b7ac31e8211b0aa608df5ab09ce07f3727326de"}, + {file = "zope.interface-6.1-cp311-cp311-win_amd64.whl", hash = "sha256:a0da79117952a9a41253696ed3e8b560a425197d4e41634a23b1507efe3273f1"}, + {file = "zope.interface-6.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:e8bb9c990ca9027b4214fa543fd4025818dc95f8b7abce79d61dc8a2112b561a"}, + {file = "zope.interface-6.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:b51b64432eed4c0744241e9ce5c70dcfecac866dff720e746d0a9c82f371dfa7"}, + {file = "zope.interface-6.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:aa6fd016e9644406d0a61313e50348c706e911dca29736a3266fc9e28ec4ca6d"}, + {file = "zope.interface-6.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0c8cf55261e15590065039696607f6c9c1aeda700ceee40c70478552d323b3ff"}, + {file = "zope.interface-6.1-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e30506bcb03de8983f78884807e4fd95d8db6e65b69257eea05d13d519b83ac0"}, + {file = "zope.interface-6.1-cp312-cp312-win_amd64.whl", hash = "sha256:e33e86fd65f369f10608b08729c8f1c92ec7e0e485964670b4d2633a4812d36b"}, + {file = "zope.interface-6.1-cp37-cp37m-macosx_11_0_x86_64.whl", hash = "sha256:2f8d89721834524a813f37fa174bac074ec3d179858e4ad1b7efd4401f8ac45d"}, + {file = "zope.interface-6.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:13b7d0f2a67eb83c385880489dbb80145e9d344427b4262c49fbf2581677c11c"}, + {file = "zope.interface-6.1-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ef43ee91c193f827e49599e824385ec7c7f3cd152d74cb1dfe02cb135f264d83"}, + {file = "zope.interface-6.1-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e441e8b7d587af0414d25e8d05e27040d78581388eed4c54c30c0c91aad3a379"}, + {file = "zope.interface-6.1-cp37-cp37m-win_amd64.whl", hash = "sha256:f89b28772fc2562ed9ad871c865f5320ef761a7fcc188a935e21fe8b31a38ca9"}, + {file = "zope.interface-6.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:70d2cef1bf529bff41559be2de9d44d47b002f65e17f43c73ddefc92f32bf00f"}, + {file = "zope.interface-6.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:ad54ed57bdfa3254d23ae04a4b1ce405954969c1b0550cc2d1d2990e8b439de1"}, + {file = "zope.interface-6.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ef467d86d3cfde8b39ea1b35090208b0447caaabd38405420830f7fd85fbdd56"}, + {file = "zope.interface-6.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6af47f10cfc54c2ba2d825220f180cc1e2d4914d783d6fc0cd93d43d7bc1c78b"}, + {file = "zope.interface-6.1-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c9559138690e1bd4ea6cd0954d22d1e9251e8025ce9ede5d0af0ceae4a401e43"}, + {file = "zope.interface-6.1-cp38-cp38-win_amd64.whl", hash = "sha256:964a7af27379ff4357dad1256d9f215047e70e93009e532d36dcb8909036033d"}, + {file = "zope.interface-6.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:387545206c56b0315fbadb0431d5129c797f92dc59e276b3ce82db07ac1c6179"}, + {file = "zope.interface-6.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:57d0a8ce40ce440f96a2c77824ee94bf0d0925e6089df7366c2272ccefcb7941"}, + {file = "zope.interface-6.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7ebc4d34e7620c4f0da7bf162c81978fce0ea820e4fa1e8fc40ee763839805f3"}, + {file = "zope.interface-6.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5a804abc126b33824a44a7aa94f06cd211a18bbf31898ba04bd0924fbe9d282d"}, + {file = "zope.interface-6.1-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1f294a15f7723fc0d3b40701ca9b446133ec713eafc1cc6afa7b3d98666ee1ac"}, + {file = "zope.interface-6.1-cp39-cp39-win_amd64.whl", hash = "sha256:a41f87bb93b8048fe866fa9e3d0c51e27fe55149035dcf5f43da4b56732c0a40"}, + {file = "zope.interface-6.1.tar.gz", hash = "sha256:2fdc7ccbd6eb6b7df5353012fbed6c3c5d04ceaca0038f75e601060e95345309"}, +] + +[package.dependencies] +setuptools = "*" + +[package.extras] +docs = ["Sphinx", "repoze.sphinx.autointerface", "sphinx-rtd-theme"] +test = ["coverage (>=5.0.3)", "zope.event", "zope.testing"] +testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"] + [metadata] lock-version = "2.0" python-versions = ">=3.8,<3.12" -content-hash = "3617e1e3d479d0955f9107113d56e04d5692e74d73d37beba083e53ce83f4795" +content-hash = "f809dee9b269816495aebe98b7299657df63d20446e140df3635955ca358c861" diff --git a/pyproject.toml b/pyproject.toml index ca8f1ae80..c8282bc44 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "dcicutils" -version = "8.4.0.1b2" # TODO: To become 8.4.1 +version = "8.4.0.1b3" # TODO: To become 8.4.1 description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources" authors = ["4DN-DCIC Team "] license = "MIT" @@ -53,6 +53,7 @@ openpyxl = "^3.1.2" opensearch-py = "^2.0.1" pyOpenSSL = "^23.1.1" PyJWT = "^2.6.0" +pyramid = "1.10.4" pytz = ">=2020.4" redis = "^4.5.1" PyYAML = "^6.0.1" From 4484aec857d7680f0e7b3d338b23ed7744504d86 Mon Sep 17 00:00:00 2001 From: David Michaels Date: Sun, 3 Dec 2023 12:18:36 -0500 Subject: [PATCH 11/53] Minor code cleanup. --- dcicutils/structured_data.py | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/dcicutils/structured_data.py b/dcicutils/structured_data.py index 3ab2b10bc..1e4405018 100644 --- a/dcicutils/structured_data.py +++ b/dcicutils/structured_data.py @@ -481,15 +481,8 @@ def __init__(self, env: Optional[str] = None, app: OrchestratedApp = APP_SMAHT, server: Optional[str] = None, key: Optional[Union[dict, tuple]] = None, portal: Optional[Union[VirtualApp, TestApp, Router, Portal, str]] = None) -> PortalBase: - if isinstance(arg, VirtualApp) and not portal: - portal = arg - elif isinstance(arg, TestApp) and not portal: - portal = arg - elif isinstance(arg, Router) and not portal: - portal = arg - elif isinstance(arg, Portal) and not portal: - portal = arg - elif isinstance(arg, str) and arg.endswith(".ini") and not portal: + if (isinstance(arg, (VirtualApp, TestApp, Router, Portal)) or + isinstance(arg, str) and arg.endswith(".ini")) and not portal portal = arg elif isinstance(arg, str) and not env: env = arg From cc3bdec2faa1df78043e70707b564f83d611ddf0 Mon Sep 17 00:00:00 2001 From: David Michaels Date: Sun, 3 Dec 2023 12:19:47 -0500 Subject: [PATCH 12/53] Minor code cleanup. --- dcicutils/structured_data.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dcicutils/structured_data.py b/dcicutils/structured_data.py index 1e4405018..50bed3b37 100644 --- a/dcicutils/structured_data.py +++ b/dcicutils/structured_data.py @@ -481,8 +481,8 @@ def __init__(self, env: Optional[str] = None, app: OrchestratedApp = APP_SMAHT, server: Optional[str] = None, key: Optional[Union[dict, tuple]] = None, portal: Optional[Union[VirtualApp, TestApp, Router, Portal, str]] = None) -> PortalBase: - if (isinstance(arg, (VirtualApp, TestApp, Router, Portal)) or - isinstance(arg, str) and arg.endswith(".ini")) and not portal + if ((isinstance(arg, (VirtualApp, TestApp, Router, Portal)) or + isinstance(arg, str) and arg.endswith(".ini")) and not portal): portal = arg elif isinstance(arg, str) and not env: env = arg From 0284e4114b4d67405182a5c7f3c783c54a193658 Mon Sep 17 00:00:00 2001 From: David Michaels Date: Sun, 3 Dec 2023 16:03:17 -0500 Subject: [PATCH 13/53] Update dcictutil version which now has structured_data. --- dcicutils/structured_data.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/dcicutils/structured_data.py b/dcicutils/structured_data.py index 50bed3b37..4e174333c 100644 --- a/dcicutils/structured_data.py +++ b/dcicutils/structured_data.py @@ -9,7 +9,7 @@ import requests from requests.models import Response as RequestResponse import sys -from typing import Any, Callable, List, Optional, Tuple, Type, Union +from typing import Any, Callable, List, Optional, Set, Tuple, Type, Union from webtest.app import TestApp, TestResponse from dcicutils.common import OrchestratedApp, APP_CGAP, APP_FOURFRONT, APP_SMAHT, ORCHESTRATED_APPS from dcicutils.creds_utils import CGAPKeyManager, FourfrontKeyManager, SMaHTKeyManager @@ -53,6 +53,8 @@ def __init__(self, file: Optional[str] = None, portal: Optional[Union[VirtualApp self._order = order self._prune = prune self._issues = None + self._refs_resolved = set() + self._refs_unresolved = set() self._load_file(file) if file else None @staticmethod @@ -72,6 +74,9 @@ def validate(self) -> Optional[List[str]]: issues.extend([f"{schema.name} [{item_number}]: {issue}" for issue in validation_issues]) return issues + (self._issues or []) + def refs(self) -> Tuple[List[str], List[str]]: + return (sorted(self._refs_resolved), sorted(self._refs_unresolved)) + def _load_file(self, file: str) -> None: # Returns a dictionary where each property is the name (i.e. the type) of the data, # and the value is array of dictionaries for the data itself. Handle these kinds of files: @@ -128,6 +133,8 @@ def _load_reader(self, reader: RowReader, type_name: str) -> None: structured_row_template.set_value(structured_row, column_name, value, reader.location) if schema and (schema_name := schema.name): type_name = schema_name + self._refs_resolved = self._refs_resolved | schema._refs_resolved + self._refs_unresolved = self._refs_unresolved | schema._refs_unresolved self._add(type_name, structured_row) def _add(self, type_name: str, data: Union[dict, List[dict]]) -> None: @@ -262,6 +269,8 @@ def __init__(self, schema_json: dict, portal: Optional[Portal] = None) -> None: "number": self._map_function_number, "string": self._map_function_string } + self._refs_resolved = set() + self._refs_unresolved = set() self._typeinfo = self._create_typeinfo(schema_json) @staticmethod @@ -336,11 +345,14 @@ def map_ref(value: str, link_to: str, portal: Optional[Portal], src: Optional[st exception = None if not value: if (column := typeinfo.get("column")) and column in self.data.get("required", []): + self._refs_unresolved.add(f"/{link_to}/") exception = f"No required reference (linkTo) value for: {link_to}" elif portal and not portal.ref_exists(link_to, value): + self._refs_unresolved.add(f"/{link_to}/{value}") exception = f"Cannot resolve reference (linkTo) for: {link_to}" if exception: raise Exception(exception + f"{f'/{value}' if value else ''}{f' from {src}' if src else ''}") + self._refs_resolved.add(f"/{link_to}/{value}") return value return lambda value, src: map_ref(value, typeinfo.get("linkTo"), self._portal, src) From 69721091632733d94ded67cbf808087115ca22ec Mon Sep 17 00:00:00 2001 From: David Michaels Date: Mon, 4 Dec 2023 12:41:15 -0500 Subject: [PATCH 14/53] Handle propertly sheet names with trailing spaces for data_readers.Excel --- dcicutils/data_readers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dcicutils/data_readers.py b/dcicutils/data_readers.py index b6f82263d..46a07a059 100644 --- a/dcicutils/data_readers.py +++ b/dcicutils/data_readers.py @@ -139,7 +139,7 @@ def sheet_reader(self, sheet_name: str) -> ExcelSheetReader: def open(self) -> None: if self._workbook is None: self._workbook = openpyxl.load_workbook(self._file, data_only=True) - self.sheet_names = [(sheet_name or "").strip() for sheet_name in (self._workbook.sheetnames or [])] + self.sheet_names = self._workbook.sheetnames or [] def __del__(self) -> None: if (workbook := self._workbook) is not None: From 2e20cf2681e1b7ae76dd7b292bf116c2f6a64e31 Mon Sep 17 00:00:00 2001 From: David Michaels Date: Mon, 4 Dec 2023 18:15:44 -0500 Subject: [PATCH 15/53] Change to data_readers.Excel to respect (ignore) hidden sheets. --- dcicutils/data_readers.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dcicutils/data_readers.py b/dcicutils/data_readers.py index 46a07a059..11e493d68 100644 --- a/dcicutils/data_readers.py +++ b/dcicutils/data_readers.py @@ -139,7 +139,8 @@ def sheet_reader(self, sheet_name: str) -> ExcelSheetReader: def open(self) -> None: if self._workbook is None: self._workbook = openpyxl.load_workbook(self._file, data_only=True) - self.sheet_names = self._workbook.sheetnames or [] + self.sheet_names = [sheet_name for sheet_name in self._workbook.sheetnames + if self._workbook[sheet_name].sheet_state != "hidden"] def __del__(self) -> None: if (workbook := self._workbook) is not None: From ddb174e1ecdfa3485eb128707d14d5d0209dcd1b Mon Sep 17 00:00:00 2001 From: David Michaels Date: Mon, 4 Dec 2023 23:39:32 -0500 Subject: [PATCH 16/53] Added get_schemas_super_type_map to PortalBase in structured_data to handle sub-class refs. --- dcicutils/structured_data.py | 52 +++++++++++++++++++++++++++++++++--- 1 file changed, 48 insertions(+), 4 deletions(-) diff --git a/dcicutils/structured_data.py b/dcicutils/structured_data.py index 4e174333c..f90caca3e 100644 --- a/dcicutils/structured_data.py +++ b/dcicutils/structured_data.py @@ -1,3 +1,4 @@ +from collections import deque import copy from functools import lru_cache import json @@ -9,7 +10,7 @@ import requests from requests.models import Response as RequestResponse import sys -from typing import Any, Callable, List, Optional, Set, Tuple, Type, Union +from typing import Any, Callable, List, Optional, Tuple, Type, Union from webtest.app import TestApp, TestResponse from dcicutils.common import OrchestratedApp, APP_CGAP, APP_FOURFRONT, APP_SMAHT, ORCHESTRATED_APPS from dcicutils.creds_utils import CGAPKeyManager, FourfrontKeyManager, SMaHTKeyManager @@ -544,9 +545,6 @@ def post_metadata(self, object_type: str, data: str) -> Optional[dict]: return post_metadata(schema_name=object_type, post_item=data, key=self._key) return self.post(f"/{object_type}", data) - def get_schema(self, schema_name: str) -> Optional[dict]: - return get_schema(schema_name, portal_vapp=self._vapp, key=self._key) - def get(self, uri: str, follow: bool = True, **kwargs) -> Optional[Union[RequestResponse, TestResponse]]: if isinstance(self._vapp, (VirtualApp, TestApp)): response = self._vapp.get(self._uri(uri), **self._kwargs(**kwargs)) @@ -570,6 +568,40 @@ def post(self, uri: str, data: Optional[dict] = None, json: Optional[dict] = Non return self._vapp.post_json(self._uri(uri), json or data, upload_files=files, **self._kwargs(**kwargs)) return requests.post(self._uri(uri), json=json or data, files=files, **self._kwargs(**kwargs)) + def get_schema(self, schema_name: str) -> Optional[dict]: + return get_schema(schema_name, portal_vapp=self._vapp, key=self._key) + + @lru_cache(maxsize=1) + def get_schemas_super_type_map(self) -> dict: + """ + Returns the "super type map" for all of the known schemas. + This is a dictionary of all top-level types which have sub-types whose value is and + array of all of those sub-types (direct and all descendents), in breadth first order. + """ + def breadth_first(super_type_map: dict, super_type_name: str) -> dict: + result = [] + queue = deque(super_type_map.get(super_type_name, [])) + while queue: + result.append(sub_type_name := queue.popleft()) + if sub_type_name in super_type_map: + queue.extend(super_type_map[sub_type_name]) + return result + if not (schemas := self.get("/profiles/")) or not (schemas := schemas.json): + return {} + super_type_map = {} + for type_name in schemas: + if super_type_name := schemas[type_name].get("rdfs:subClassOf"): + super_type_name = super_type_name.replace("/profiles/", "").replace(".json", "") + if super_type_name != "Item": + if not super_type_map.get(super_type_name): + super_type_map[super_type_name] = [type_name] + elif type_name not in super_type_map[super_type_name]: + super_type_map[super_type_name].append(type_name) + super_type_map_flattened = {} + for super_type_name in super_type_map: + super_type_map_flattened[super_type_name] = breadth_first(super_type_map, super_type_name) + return super_type_map_flattened + def _uri(self, uri: str) -> str: if not isinstance(uri, str) or not uri: return "/" @@ -660,6 +692,8 @@ def get_metadata(self, object_name: str) -> Optional[dict]: @lru_cache(maxsize=256) def get_schema(self, schema_name: str) -> Optional[dict]: + # TODO: Now that we have get_schemas_super_type_map which gets all schemas, might as + # well use it and not hit the portal for each get_schema request (even though lru cached). def get_schema_exact(schema_name: str) -> Optional[dict]: # noqa return (next((schema for schema in self._schemas or [] if Schema.type_name(schema.get("title")) == Schema.type_name(schema_name)), None) or @@ -677,6 +711,16 @@ def get_schema_exact(schema_name: str) -> Optional[dict]: # noqa raise def ref_exists(self, type_name: str, value: str) -> bool: + if self._ref_exists_single(type_name, value): + return True + if (schemas_super_type_map := self.get_schemas_super_type_map()): + if (sub_type_names := schemas_super_type_map.get(type_name)): + for sub_type_name in sub_type_names: + if self._ref_exists_single(sub_type_name, value): + return True + return False + + def _ref_exists_single(self, type_name: str, value: str) -> bool: if self._data and (items := self._data.get(type_name)) and (schema := self.get_schema(type_name)): iproperties = set(schema.get("identifyingProperties", [])) | {"identifier", "uuid"} for item in items: From 08d59ea43fed9c5452550702e09c7b0840860412 Mon Sep 17 00:00:00 2001 From: David Michaels Date: Tue, 5 Dec 2023 08:04:09 -0500 Subject: [PATCH 17/53] More work related to structured_data.PortalBase.get_schemas_super_type_map to handle sub-class refs. --- dcicutils/structured_data.py | 99 +++++++++++++++++++----------------- 1 file changed, 51 insertions(+), 48 deletions(-) diff --git a/dcicutils/structured_data.py b/dcicutils/structured_data.py index f90caca3e..b26c08d74 100644 --- a/dcicutils/structured_data.py +++ b/dcicutils/structured_data.py @@ -571,36 +571,8 @@ def post(self, uri: str, data: Optional[dict] = None, json: Optional[dict] = Non def get_schema(self, schema_name: str) -> Optional[dict]: return get_schema(schema_name, portal_vapp=self._vapp, key=self._key) - @lru_cache(maxsize=1) - def get_schemas_super_type_map(self) -> dict: - """ - Returns the "super type map" for all of the known schemas. - This is a dictionary of all top-level types which have sub-types whose value is and - array of all of those sub-types (direct and all descendents), in breadth first order. - """ - def breadth_first(super_type_map: dict, super_type_name: str) -> dict: - result = [] - queue = deque(super_type_map.get(super_type_name, [])) - while queue: - result.append(sub_type_name := queue.popleft()) - if sub_type_name in super_type_map: - queue.extend(super_type_map[sub_type_name]) - return result - if not (schemas := self.get("/profiles/")) or not (schemas := schemas.json): - return {} - super_type_map = {} - for type_name in schemas: - if super_type_name := schemas[type_name].get("rdfs:subClassOf"): - super_type_name = super_type_name.replace("/profiles/", "").replace(".json", "") - if super_type_name != "Item": - if not super_type_map.get(super_type_name): - super_type_map[super_type_name] = [type_name] - elif type_name not in super_type_map[super_type_name]: - super_type_map[super_type_name].append(type_name) - super_type_map_flattened = {} - for super_type_name in super_type_map: - super_type_map_flattened[super_type_name] = breadth_first(super_type_map, super_type_name) - return super_type_map_flattened + def get_schemas(self) -> dict: + return self.get("/profiles/").json def _uri(self, uri: str) -> str: if not isinstance(uri, str) or not uri: @@ -629,7 +601,7 @@ def create_for_testing(ini_file: Optional[str] = None) -> PortalBase: @staticmethod def create_for_testing_local(ini_file: Optional[str] = None) -> Portal: - if isinstance(ini_file, str): + if isinstance(ini_file, str) and ini_file: return Portal(Portal._create_testapp(ini_file)) minimal_ini_for_testing_local = "\n".join([ "[app:app]\nuse = egg:encoded\nfile_upload_bucket = dummy", @@ -692,27 +664,58 @@ def get_metadata(self, object_name: str) -> Optional[dict]: @lru_cache(maxsize=256) def get_schema(self, schema_name: str) -> Optional[dict]: - # TODO: Now that we have get_schemas_super_type_map which gets all schemas, might as - # well use it and not hit the portal for each get_schema request (even though lru cached). - def get_schema_exact(schema_name: str) -> Optional[dict]: # noqa - return (next((schema for schema in self._schemas or [] - if Schema.type_name(schema.get("title")) == Schema.type_name(schema_name)), None) or - super(Portal, self).get_schema(schema_name)) - try: - if (schema := get_schema_exact(schema_name)): - return schema - except Exception: # Try/force camel-case if all upper/lower-case. - if schema_name == schema_name.upper(): - if (schema := get_schema_exact(schema_name.lower().title())): - return schema - elif schema_name == schema_name.lower(): - if (schema := get_schema_exact(schema_name.title())): - return schema - raise + if (schema := self.get_schemas().get(schema_name)): + return schema + if schema_name == schema_name.upper() and (schema := self.get_schemas(schema_name.lower().title())): + return schema + if schema_name == schema_name.lower() and (schema := get_schema_exact(schema_name.title())): + return schema + + @lru_cache(maxsize=1) + def get_schemas(self) -> dict: + schemas = super(Portal, self).get_schemas() + if self._schemas: + schemas = copy.deepcopy(schemas) + for portal_schemas in self._schemas: + if portal_schema.get("title"): + schemas[portal_schema["title"]] = portal_schema + return schemas + + @lru_cache(maxsize=1) + def get_schemas_super_type_map(self) -> dict: + """ + Returns the "super type map" for all of the known schemas (via /profiles). + This is a dictionary of all types which have (one or more) sub-types whose value is + an array of all of those sub-types (direct and all descendents), in breadth first order. + """ + def breadth_first(super_type_map: dict, super_type_name: str) -> dict: + result = [] + queue = deque(super_type_map.get(super_type_name, [])) + while queue: + result.append(sub_type_name := queue.popleft()) + if sub_type_name in super_type_map: + queue.extend(super_type_map[sub_type_name]) + return result + if not (schemas := self.get_schemas()): + return {} + super_type_map = {} + for type_name in schemas: + if super_type_name := schemas[type_name].get("rdfs:subClassOf"): + super_type_name = super_type_name.replace("/profiles/", "").replace(".json", "") + if super_type_name != "Item": + if not super_type_map.get(super_type_name): + super_type_map[super_type_name] = [type_name] + elif type_name not in super_type_map[super_type_name]: + super_type_map[super_type_name].append(type_name) + super_type_map_flattened = {} + for super_type_name in super_type_map: + super_type_map_flattened[super_type_name] = breadth_first(super_type_map, super_type_name) + return super_type_map_flattened def ref_exists(self, type_name: str, value: str) -> bool: if self._ref_exists_single(type_name, value): return True + # Check for the given ref in all sub-types of the given type. if (schemas_super_type_map := self.get_schemas_super_type_map()): if (sub_type_names := schemas_super_type_map.get(type_name)): for sub_type_name in sub_type_names: From d6d235469b390d14623765f4a34bdb79e3a9ff7f Mon Sep 17 00:00:00 2001 From: David Michaels Date: Tue, 5 Dec 2023 08:14:31 -0500 Subject: [PATCH 18/53] More work related to structured_data.PortalBase.get_schemas_super_type_map to handle sub-class refs. --- dcicutils/structured_data.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dcicutils/structured_data.py b/dcicutils/structured_data.py index b26c08d74..e70a758bc 100644 --- a/dcicutils/structured_data.py +++ b/dcicutils/structured_data.py @@ -676,9 +676,9 @@ def get_schemas(self) -> dict: schemas = super(Portal, self).get_schemas() if self._schemas: schemas = copy.deepcopy(schemas) - for portal_schemas in self._schemas: - if portal_schema.get("title"): - schemas[portal_schema["title"]] = portal_schema + for user_specified_schema in self._schemas: + if user_specified_schema.get("title"): + schemas[user_specified_schema["title"]] = user_specified_schema return schemas @lru_cache(maxsize=1) From 0d97817738e4ef7eac3642fddfc635842abe5d15 Mon Sep 17 00:00:00 2001 From: David Michaels Date: Tue, 5 Dec 2023 08:21:18 -0500 Subject: [PATCH 19/53] More work related to structured_data.PortalBase.get_schemas_super_type_map to handle sub-class refs. --- dcicutils/structured_data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dcicutils/structured_data.py b/dcicutils/structured_data.py index e70a758bc..f106af7f7 100644 --- a/dcicutils/structured_data.py +++ b/dcicutils/structured_data.py @@ -668,7 +668,7 @@ def get_schema(self, schema_name: str) -> Optional[dict]: return schema if schema_name == schema_name.upper() and (schema := self.get_schemas(schema_name.lower().title())): return schema - if schema_name == schema_name.lower() and (schema := get_schema_exact(schema_name.title())): + if schema_name == schema_name.lower() and (schema := self.get_schemas(schema_name.title())): return schema @lru_cache(maxsize=1) From 4f8b0079f14944371598b4d79953f87805cdc56c Mon Sep 17 00:00:00 2001 From: David Michaels Date: Tue, 5 Dec 2023 12:21:04 -0500 Subject: [PATCH 20/53] More work related to structured_data.PortalBase.get_schemas_super_type_map to handle sub-class refs. --- dcicutils/structured_data.py | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/dcicutils/structured_data.py b/dcicutils/structured_data.py index f106af7f7..fd86b49c4 100644 --- a/dcicutils/structured_data.py +++ b/dcicutils/structured_data.py @@ -347,10 +347,10 @@ def map_ref(value: str, link_to: str, portal: Optional[Portal], src: Optional[st if not value: if (column := typeinfo.get("column")) and column in self.data.get("required", []): self._refs_unresolved.add(f"/{link_to}/") - exception = f"No required reference (linkTo) value for: {link_to}" + exception = f"No required reference (linkTo) value for: /{link_to}" elif portal and not portal.ref_exists(link_to, value): self._refs_unresolved.add(f"/{link_to}/{value}") - exception = f"Cannot resolve reference (linkTo) for: {link_to}" + exception = f"Cannot resolve reference (linkTo) for: /{link_to}" if exception: raise Exception(exception + f"{f'/{value}' if value else ''}{f' from {src}' if src else ''}") self._refs_resolved.add(f"/{link_to}/{value}") @@ -550,7 +550,7 @@ def get(self, uri: str, follow: bool = True, **kwargs) -> Optional[Union[Request response = self._vapp.get(self._uri(uri), **self._kwargs(**kwargs)) if response and response.status_code in [301, 302, 303, 307, 308] and follow: response = response.follow() - return response + return self._response(response) return requests.get(self._uri(uri), allow_redirects=follow, **self._kwargs(**kwargs)) def patch(self, uri: str, data: Optional[dict] = None, @@ -572,7 +572,7 @@ def get_schema(self, schema_name: str) -> Optional[dict]: return get_schema(schema_name, portal_vapp=self._vapp, key=self._key) def get_schemas(self) -> dict: - return self.get("/profiles/").json + return self.get("/profiles/").json() def _uri(self, uri: str) -> str: if not isinstance(uri, str) or not uri: @@ -591,6 +591,19 @@ def _kwargs(self, **kwargs) -> dict: result_kwargs["timeout"] = timeout return result_kwargs + def _response(self, response) -> Optional[RequestResponse]: + if response and isinstance(getattr(response.__class__, "json"), property): + class RequestResponseWrapper: # For consistency change json property to method. + def __init__(self, respnose, **kwargs): + super().__init__(**kwargs) + self._response = response + def __getattr__(self, attr): + return getattr(self._response, attr) + def json(self): + return self._response.json + response = RequestResponseWrapper(response) + return response + @staticmethod def create_for_testing(ini_file: Optional[str] = None) -> PortalBase: if isinstance(ini_file, str): @@ -664,11 +677,11 @@ def get_metadata(self, object_name: str) -> Optional[dict]: @lru_cache(maxsize=256) def get_schema(self, schema_name: str) -> Optional[dict]: - if (schema := self.get_schemas().get(schema_name)): + if (schemas := self.get_schemas()) and (schema := schemas.get(schema_name := Schema.type_name(schema_name))): return schema - if schema_name == schema_name.upper() and (schema := self.get_schemas(schema_name.lower().title())): + if schema_name == schema_name.upper() and (schema := schemas.get(schema_name.lower().title())): return schema - if schema_name == schema_name.lower() and (schema := self.get_schemas(schema_name.title())): + if schema_name == schema_name.lower() and (schema := schemas.get(schema_name.title())): return schema @lru_cache(maxsize=1) From 7c5b45b861c4f863ccec88b2d1dbdaf9487335f0 Mon Sep 17 00:00:00 2001 From: David Michaels Date: Tue, 5 Dec 2023 12:22:04 -0500 Subject: [PATCH 21/53] More work related to structured_data.PortalBase.get_schemas_super_type_map to handle sub-class refs. --- dcicutils/structured_data.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dcicutils/structured_data.py b/dcicutils/structured_data.py index fd86b49c4..7931aeead 100644 --- a/dcicutils/structured_data.py +++ b/dcicutils/structured_data.py @@ -597,9 +597,9 @@ class RequestResponseWrapper: # For consistency change json property to method. def __init__(self, respnose, **kwargs): super().__init__(**kwargs) self._response = response - def __getattr__(self, attr): + def __getattr__(self, attr): # noqa return getattr(self._response, attr) - def json(self): + def json(self): # noqa return self._response.json response = RequestResponseWrapper(response) return response From ae931ecef65aa131f9817c02580d88af38373b95 Mon Sep 17 00:00:00 2001 From: David Michaels Date: Tue, 5 Dec 2023 12:27:39 -0500 Subject: [PATCH 22/53] update to 8.4.0.1b4 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index c8282bc44..72b757225 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "dcicutils" -version = "8.4.0.1b3" # TODO: To become 8.4.1 +version = "8.4.0.1b4" # TODO: To become 8.4.1 description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources" authors = ["4DN-DCIC Team "] license = "MIT" From 127153670176d866fc43bd63701a1da24d2261b4 Mon Sep 17 00:00:00 2001 From: David Michaels Date: Tue, 5 Dec 2023 17:15:58 -0500 Subject: [PATCH 23/53] More structured_data refactoring to record exceptions rather than raise. --- dcicutils/data_readers.py | 31 ++++++--- dcicutils/misc_utils.py | 8 +++ dcicutils/structured_data.py | 122 ++++++++++++++++++++++------------- 3 files changed, 105 insertions(+), 56 deletions(-) diff --git a/dcicutils/data_readers.py b/dcicutils/data_readers.py index 11e493d68..9073080d6 100644 --- a/dcicutils/data_readers.py +++ b/dcicutils/data_readers.py @@ -2,27 +2,30 @@ import csv import openpyxl from typing import Any, Generator, Iterator, List, Optional, Type, Tuple, Union -from dcicutils.misc_utils import right_trim +from dcicutils.misc_utils import create_object, right_trim + +# Forward type references for type hints. +Excel = Type["Excel"] class RowReader(abc.ABC): def __init__(self): self.header = None - self.location = 0 + self.row_number = 0 self._warning_empty_headers = False self._warning_extra_values = [] # Line numbers. self.open() def __iter__(self) -> Iterator: for row in self.rows: - self.location += 1 + self.row_number += 1 if self.is_comment_row(row): continue if self.is_terminating_row(row): break if len(self.header) < len(row): # Row values beyond what there are headers for are ignored. - self._warning_extra_values.append(self.location) + self._warning_extra_values.append(self.row_number) yield {column: self.cell_value(value) for column, value in zip(self.header, row)} def _define_header(self, header: List[Optional[Any]]) -> None: @@ -49,13 +52,20 @@ def open(self) -> None: pass @property - def issues(self) -> Optional[List[str]]: + def file(self) -> Optional[str]: + return self._file if hasattr(self, "_file") else None + + @property + def issues(self) -> List[str]: issues = [] if self._warning_empty_headers: - issues.append("Empty header column encountered; ignoring it and all subsequent columns.") + issues.append({"src": create_object(file=self.file), + "warning": "Empty header column encountered; ignoring it and all subsequent columns."}) if self._warning_extra_values: - issues.extend([f"Extra column values on row [{row_number}]" for row_number in self._warning_extra_values]) - return issues if issues else None + for row_number in self._warning_extra_values: + issues.append({"src": create_object(file=self.file, row=row_number), + "warning": f"Extra row column values."}) + return issues class ListReader(RowReader): @@ -101,9 +111,10 @@ def __del__(self) -> None: class ExcelSheetReader(RowReader): - def __init__(self, sheet_name: str, workbook: openpyxl.workbook.workbook.Workbook) -> None: + def __init__(self, excel: Excel, sheet_name: str, workbook: openpyxl.workbook.workbook.Workbook) -> None: self.sheet_name = sheet_name or "Sheet1" self._workbook = workbook + self._file = excel._file self._rows = None super().__init__() @@ -134,7 +145,7 @@ def __init__(self, file: str, reader_class: Optional[Type] = None) -> None: self.open() def sheet_reader(self, sheet_name: str) -> ExcelSheetReader: - return self._reader_class(sheet_name=sheet_name, workbook=self._workbook) + return self._reader_class(self, sheet_name=sheet_name, workbook=self._workbook) def open(self) -> None: if self._workbook is None: diff --git a/dcicutils/misc_utils.py b/dcicutils/misc_utils.py index 0484d6992..752fc91e7 100644 --- a/dcicutils/misc_utils.py +++ b/dcicutils/misc_utils.py @@ -1501,6 +1501,14 @@ def right_trim(list_or_tuple: Union[List[Any], Tuple[Any]], return list_or_tuple[:i + 1] +def create_object(**kwargs) -> dict: + result = {} + for name in kwargs: + if kwargs[name]: + result[name] = kwargs[name] + return result + + def is_c4_arn(arn: str) -> bool: """ Returns True iff the given (presumed) AWS ARN string value looks like it diff --git a/dcicutils/structured_data.py b/dcicutils/structured_data.py index 7931aeead..c3e6541ba 100644 --- a/dcicutils/structured_data.py +++ b/dcicutils/structured_data.py @@ -16,8 +16,8 @@ from dcicutils.creds_utils import CGAPKeyManager, FourfrontKeyManager, SMaHTKeyManager from dcicutils.data_readers import CsvReader, Excel, RowReader from dcicutils.ff_utils import get_metadata, get_schema, patch_metadata, post_metadata -from dcicutils.misc_utils import (load_json_if, merge_objects, remove_empty_properties, right_trim, split_string, - to_boolean, to_camel_case, to_enum, to_float, to_integer, VirtualApp) +from dcicutils.misc_utils import (create_object, load_json_if, merge_objects, remove_empty_properties, right_trim, + split_string, to_boolean, to_camel_case, to_enum, to_float, to_integer, VirtualApp) from dcicutils.zip_utils import temporary_file, unpack_gz_file_to_temporary_file, unpack_files @@ -53,9 +53,9 @@ def __init__(self, file: Optional[str] = None, portal: Optional[Union[VirtualApp self._portal = Portal(portal, data=self.data, schemas=schemas) if portal else None self._order = order self._prune = prune - self._issues = None - self._refs_resolved = set() - self._refs_unresolved = set() + self._issues = {} + self._refs_resolved = [] + self._validated = False self._load_file(file) if file else None @staticmethod @@ -64,19 +64,39 @@ def load(file: str, portal: Optional[Union[VirtualApp, TestApp, Portal]] = None, order: Optional[List[str]] = None, prune: bool = True) -> StructuredDataSet: return StructuredDataSet(file=file, portal=portal, schemas=schemas, order=order, prune=prune) - def validate(self) -> Optional[List[str]]: - issues = [] + def validate(self, force: bool = False) -> None: + if self._validated and not force: + return + self._validated = True for type_name in self.data: if (schema := Schema.load_by_name(type_name, portal=self._portal)): - item_number = 0 + row_number = 0 for data in self.data[type_name]: - item_number += 1 + row_number += 1 if (validation_issues := schema.validate(data)) is not None: - issues.extend([f"{schema.name} [{item_number}]: {issue}" for issue in validation_issues]) - return issues + (self._issues or []) + for validation_issue in validation_issues: + self._note_issue({"src": create_object(type=schema.name, row=row_number), + "error": validation_issue}, "validation") + + @property + def issues(self): + return self._issues + + @property + def issues_reader(self) -> List[dict]: + return self._issues.get("reader") or [] + + @property + def issues_linkto(self) -> List[dict]: + return self._issues.get("linkto") or [] + + @property + def issues_validation(self) -> List[dict]: + return self._issues.get("validation") or [] - def refs(self) -> Tuple[List[str], List[str]]: - return (sorted(self._refs_resolved), sorted(self._refs_unresolved)) + @property + def refs_resolved(self) -> List[str]: + return self._refs_resolved def _load_file(self, file: str) -> None: # Returns a dictionary where each property is the name (i.e. the type) of the data, @@ -107,14 +127,12 @@ def _load_packed_file(self, file: str) -> None: def _load_csv_file(self, file: str) -> None: self._load_reader(reader := CsvReader(file), type_name=Schema.type_name(file)) - self._note_issues(reader.issues, os.path.basename(file)) def _load_excel_file(self, file: str) -> None: excel = Excel(file) # Order the sheet names by any specified ordering (e.g. ala snovault.loadxl). order = {Schema.type_name(key): index for index, key in enumerate(self._order)} if self._order else {} for sheet_name in sorted(excel.sheet_names, key=lambda key: order.get(Schema.type_name(key), sys.maxsize)): self._load_reader(reader := excel.sheet_reader(sheet_name), type_name=Schema.type_name(sheet_name)) - self._note_issues(reader.issues, f"{file}:{sheet_name}") def _load_json_file(self, file: str) -> None: with open(file) as f: @@ -125,18 +143,20 @@ def _load_reader(self, reader: RowReader, type_name: str) -> None: noschema = False structured_row_template = None for row in reader: - if not structured_row_template: # Delay creation just so we don't create it if there are no rows. + if not structured_row_template: # Delay schema creation so we don't reference it if there are no rows. if not schema and not noschema and not (schema := Schema.load_by_name(type_name, portal=self._portal)): noschema = True + elif schema and (schema_name := schema.name): + type_name = schema_name structured_row_template = _StructuredRowTemplate(reader.header, schema) structured_row = structured_row_template.create_row() for column_name, value in row.items(): - structured_row_template.set_value(structured_row, column_name, value, reader.location) - if schema and (schema_name := schema.name): - type_name = schema_name - self._refs_resolved = self._refs_resolved | schema._refs_resolved - self._refs_unresolved = self._refs_unresolved | schema._refs_unresolved + structured_row_template.set_value(structured_row, column_name, value, reader.file, reader.row_number) self._add(type_name, structured_row) + self._note_issue(reader.issues, "reader") + if schema: + self._note_issue(schema._refs_unresolved, "linkto") + self._refs_resolved = schema._refs_resolved def _add(self, type_name: str, data: Union[dict, List[dict]]) -> None: if self._prune: @@ -146,11 +166,13 @@ def _add(self, type_name: str, data: Union[dict, List[dict]]) -> None: else: self.data[type_name] = [data] if isinstance(data, dict) else data - def _note_issues(self, issues: Optional[List[str]], source: str) -> None: - if issues: - if not self._issues: - self._issues = [] - self._issues.append({source: issues}) + def _note_issue(self, issue: Optional[Union[dict, List[dict]]], group: str) -> None: + if isinstance(issue, dict) and issue: + issue = [issue] + if isinstance(issue, list) and issue: + if not self._issues.get(group): + self._issues[group] = [] + self._issues[group].extend(issue) class _StructuredRowTemplate: @@ -163,10 +185,10 @@ def __init__(self, column_names: List[str], schema: Optional[Schema] = None) -> def create_row(self) -> dict: return copy.deepcopy(self._template) - def set_value(self, data: dict, column_name: str, value: str, loc: int = -1) -> None: + def set_value(self, data: dict, column_name: str, value: str, file: Optional[str], row_number: int = -1) -> None: if (set_value_function := self._set_value_functions.get(column_name)): - src = (f"{f'{self._schema.name}.' if self._schema else ''}" + - f"{f'{column_name}' if column_name else ''}{f' [{loc}]' if loc else ''}") + src = create_object(type=self._schema.name if self._schema else None, + column=column_name, file=file, row=row_number) set_value_function(data, value, src) def _create_row_template(self, column_names: List[str]) -> dict: # Surprisingly tricky code here. @@ -270,19 +292,27 @@ def __init__(self, schema_json: dict, portal: Optional[Portal] = None) -> None: "number": self._map_function_number, "string": self._map_function_string } - self._refs_resolved = set() - self._refs_unresolved = set() + self._refs_resolved = [] + self._refs_unresolved = [] self._typeinfo = self._create_typeinfo(schema_json) @staticmethod def load_by_name(name: str, portal: Portal) -> Optional[dict]: return Schema(portal.get_schema(Schema.type_name(name)), portal) if portal else None - def validate(self, data: dict) -> Optional[List[str]]: + def validate(self, data: dict) -> List[str]: issues = [] for issue in SchemaValidator(self.data, format_checker=SchemaValidator.FORMAT_CHECKER).iter_errors(data): issues.append(issue.message) - return issues if issues else None + return issues + + @property + def refs_unresolved(self) -> List[dict]: + return self._refs_unresolved + + @property + def refs_resolved(self) -> List[str]: + return self._refs_resolved def get_map_value_function(self, column_name: str) -> Optional[Any]: return (self._get_typeinfo(column_name) or {}).get("map") @@ -343,17 +373,16 @@ def map_string(value: str, src: Optional[str]) -> str: def _map_function_ref(self, typeinfo: dict) -> Callable: def map_ref(value: str, link_to: str, portal: Optional[Portal], src: Optional[str]) -> Any: nonlocal self, typeinfo - exception = None if not value: if (column := typeinfo.get("column")) and column in self.data.get("required", []): - self._refs_unresolved.add(f"/{link_to}/") - exception = f"No required reference (linkTo) value for: /{link_to}" - elif portal and not portal.ref_exists(link_to, value): - self._refs_unresolved.add(f"/{link_to}/{value}") - exception = f"Cannot resolve reference (linkTo) for: /{link_to}" - if exception: - raise Exception(exception + f"{f'/{value}' if value else ''}{f' from {src}' if src else ''}") - self._refs_resolved.add(f"/{link_to}/{value}") + self._refs_unresolved.append({"src": src, "ref": f"/{link_to}/"}) + elif portal: + if not (resolved := portal.ref_exists(link_to, value)): + self._refs_unresolved.append({"src": src, "ref": f"/{link_to}/{value}"}) + elif len(resolved) > 1: + self._refs_unresolved.append({"src": src, "ref": f"/{link_to}/{value}", "types": resolved}) + else: + self._refs_resolved.append(f"/{link_to}/{value}") return value return lambda value, src: map_ref(value, typeinfo.get("linkTo"), self._portal, src) @@ -725,16 +754,17 @@ def breadth_first(super_type_map: dict, super_type_name: str) -> dict: super_type_map_flattened[super_type_name] = breadth_first(super_type_map, super_type_name) return super_type_map_flattened - def ref_exists(self, type_name: str, value: str) -> bool: + def ref_exists(self, type_name: str, value: str) -> List[str]: + resolved = [] if self._ref_exists_single(type_name, value): - return True + resolved.append(type_name) # Check for the given ref in all sub-types of the given type. if (schemas_super_type_map := self.get_schemas_super_type_map()): if (sub_type_names := schemas_super_type_map.get(type_name)): for sub_type_name in sub_type_names: if self._ref_exists_single(sub_type_name, value): - return True - return False + resolved.append(type_name) + return resolved def _ref_exists_single(self, type_name: str, value: str) -> bool: if self._data and (items := self._data.get(type_name)) and (schema := self.get_schema(type_name)): From 3e916e3bd43cfb9fc318e8c087b9e25c725c162d Mon Sep 17 00:00:00 2001 From: David Michaels Date: Wed, 6 Dec 2023 09:41:48 -0500 Subject: [PATCH 24/53] Cleaning up errors/warnings feedback in structured_data. --- dcicutils/data_readers.py | 14 +++--- dcicutils/structured_data.py | 87 +++++++++++++++++++----------------- 2 files changed, 54 insertions(+), 47 deletions(-) diff --git a/dcicutils/data_readers.py b/dcicutils/data_readers.py index 9073080d6..8fd6d9a10 100644 --- a/dcicutils/data_readers.py +++ b/dcicutils/data_readers.py @@ -56,16 +56,16 @@ def file(self) -> Optional[str]: return self._file if hasattr(self, "_file") else None @property - def issues(self) -> List[str]: - issues = [] + def warnings(self) -> List[str]: + warnings = [] if self._warning_empty_headers: - issues.append({"src": create_object(file=self.file), - "warning": "Empty header column encountered; ignoring it and all subsequent columns."}) + warnings.append({"src": create_object(file=self.file), + "warning": "Empty header column encountered; ignoring it and all subsequent columns."}) if self._warning_extra_values: for row_number in self._warning_extra_values: - issues.append({"src": create_object(file=self.file, row=row_number), - "warning": f"Extra row column values."}) - return issues + warnings.append({"src": create_object(file=self.file, row=row_number), + "warning": f"Extra row column values."}) + return warnings class ListReader(RowReader): diff --git a/dcicutils/structured_data.py b/dcicutils/structured_data.py index c3e6541ba..2d54e6bcc 100644 --- a/dcicutils/structured_data.py +++ b/dcicutils/structured_data.py @@ -53,8 +53,9 @@ def __init__(self, file: Optional[str] = None, portal: Optional[Union[VirtualApp self._portal = Portal(portal, data=self.data, schemas=schemas) if portal else None self._order = order self._prune = prune - self._issues = {} - self._refs_resolved = [] + self._warnings = {} + self._errors = {} + self._resolved_refs = [] self._validated = False self._load_file(file) if file else None @@ -73,30 +74,30 @@ def validate(self, force: bool = False) -> None: row_number = 0 for data in self.data[type_name]: row_number += 1 - if (validation_issues := schema.validate(data)) is not None: - for validation_issue in validation_issues: - self._note_issue({"src": create_object(type=schema.name, row=row_number), - "error": validation_issue}, "validation") + if (validation_errors := schema.validate(data)) is not None: + for validation_error in validation_errors: + self._note_error({"src": create_object(type=schema.name, row=row_number), + "error": validation_error}, "validation") @property - def issues(self): - return self._issues + def warnings(self): + return self._warnings @property - def issues_reader(self) -> List[dict]: - return self._issues.get("reader") or [] + def errors(self): + return self._errors @property - def issues_linkto(self) -> List[dict]: - return self._issues.get("linkto") or [] + def ref_errors(self) -> List[dict]: + return self._errors.get("ref") or [] @property - def issues_validation(self) -> List[dict]: - return self._issues.get("validation") or [] + def validation_errors(self) -> List[dict]: + return self._errors.get("validation") or [] @property - def refs_resolved(self) -> List[str]: - return self._refs_resolved + def resolved_refs(self) -> List[str]: + return self._resolved_refs def _load_file(self, file: str) -> None: # Returns a dictionary where each property is the name (i.e. the type) of the data, @@ -153,10 +154,10 @@ def _load_reader(self, reader: RowReader, type_name: str) -> None: for column_name, value in row.items(): structured_row_template.set_value(structured_row, column_name, value, reader.file, reader.row_number) self._add(type_name, structured_row) - self._note_issue(reader.issues, "reader") + self._note_warning(reader.warnings, "reader") if schema: - self._note_issue(schema._refs_unresolved, "linkto") - self._refs_resolved = schema._refs_resolved + self._note_error(schema._unresolved_refs, "ref") + self._resolved_refs = schema._resolved_refs def _add(self, type_name: str, data: Union[dict, List[dict]]) -> None: if self._prune: @@ -166,13 +167,19 @@ def _add(self, type_name: str, data: Union[dict, List[dict]]) -> None: else: self.data[type_name] = [data] if isinstance(data, dict) else data - def _note_issue(self, issue: Optional[Union[dict, List[dict]]], group: str) -> None: - if isinstance(issue, dict) and issue: - issue = [issue] - if isinstance(issue, list) and issue: - if not self._issues.get(group): - self._issues[group] = [] - self._issues[group].extend(issue) + def _note_warning(self, item: Optional[Union[dict, List[dict]]], group: str) -> None: + self._note_issue(self._warnings, item, group) + + def _note_error(self, item: Optional[Union[dict, List[dict]]], group: str) -> None: + self._note_issue(self._errors, item, group) + + def _note_issue(self, issues: dict, item: Optional[Union[dict, List[dict]]], group: str) -> None: + if isinstance(item, dict) and item: + item = [item] + if isinstance(item, list) and item: + if not issues.get(group): + issues[group] = [] + issues[group].extend(item) class _StructuredRowTemplate: @@ -292,8 +299,8 @@ def __init__(self, schema_json: dict, portal: Optional[Portal] = None) -> None: "number": self._map_function_number, "string": self._map_function_string } - self._refs_resolved = [] - self._refs_unresolved = [] + self._resolved_refs = [] + self._unresolved_refs = [] self._typeinfo = self._create_typeinfo(schema_json) @staticmethod @@ -301,18 +308,18 @@ def load_by_name(name: str, portal: Portal) -> Optional[dict]: return Schema(portal.get_schema(Schema.type_name(name)), portal) if portal else None def validate(self, data: dict) -> List[str]: - issues = [] - for issue in SchemaValidator(self.data, format_checker=SchemaValidator.FORMAT_CHECKER).iter_errors(data): - issues.append(issue.message) - return issues + errors = [] + for error in SchemaValidator(self.data, format_checker=SchemaValidator.FORMAT_CHECKER).iter_errors(data): + errors.append(error.message) + return errors @property - def refs_unresolved(self) -> List[dict]: - return self._refs_unresolved + def unresolved_refs(self) -> List[dict]: + return self._unresolved_refs @property - def refs_resolved(self) -> List[str]: - return self._refs_resolved + def resolved_refs(self) -> List[str]: + return self._resolved_refs def get_map_value_function(self, column_name: str) -> Optional[Any]: return (self._get_typeinfo(column_name) or {}).get("map") @@ -375,14 +382,14 @@ def map_ref(value: str, link_to: str, portal: Optional[Portal], src: Optional[st nonlocal self, typeinfo if not value: if (column := typeinfo.get("column")) and column in self.data.get("required", []): - self._refs_unresolved.append({"src": src, "ref": f"/{link_to}/"}) + self._unresolved_refs.append({"src": src, "ref": f"/{link_to}/"}) elif portal: if not (resolved := portal.ref_exists(link_to, value)): - self._refs_unresolved.append({"src": src, "ref": f"/{link_to}/{value}"}) + self._unresolved_refs.append({"src": src, "ref": f"/{link_to}/{value}"}) elif len(resolved) > 1: - self._refs_unresolved.append({"src": src, "ref": f"/{link_to}/{value}", "types": resolved}) + self._unresolved_refs.append({"src": src, "ref": f"/{link_to}/{value}", "types": resolved}) else: - self._refs_resolved.append(f"/{link_to}/{value}") + self._resolved_refs.append(f"/{link_to}/{value}") return value return lambda value, src: map_ref(value, typeinfo.get("linkTo"), self._portal, src) From 9a0fba9a2391f5516b20679e1c81a34f17da76d2 Mon Sep 17 00:00:00 2001 From: David Michaels Date: Wed, 6 Dec 2023 10:22:47 -0500 Subject: [PATCH 25/53] Formatting support for errors/warnings in structured_data. --- dcicutils/structured_data.py | 60 ++++++++++++++++++++++++++++++------ 1 file changed, 50 insertions(+), 10 deletions(-) diff --git a/dcicutils/structured_data.py b/dcicutils/structured_data.py index 2d54e6bcc..358df643e 100644 --- a/dcicutils/structured_data.py +++ b/dcicutils/structured_data.py @@ -80,11 +80,15 @@ def validate(self, force: bool = False) -> None: "error": validation_error}, "validation") @property - def warnings(self): + def warnings(self) -> dict: return self._warnings @property - def errors(self): + def reader_warnings(self) -> List[dict]: + return self._warnings.get("reader") or [] + + @property + def errors(self) -> dict: return self._errors @property @@ -99,6 +103,42 @@ def validation_errors(self) -> List[dict]: def resolved_refs(self) -> List[str]: return self._resolved_refs + @staticmethod + def format_issue(issue: dict, original_file: Optional[str] = None) -> str: + def src_string(issue: dict) -> str: + show_file = original_file and (original_file.endswith(".zip") or + original_file.endswith(".tgz") or original_file.endswith(".gz")) + issue_src = issue.get("src") + src_file = issue_src.get("file") if show_file else "" + src_type = issue_src.get("type") + src_column = issue_src.get("column") + src_row = issue_src.get("row", 0) + if src_file: + src = f"{os.path.basename(src_file)}{':' if src_type or src_column or src_row > 0 else ''}" + else: + src = "" + if src_type: + src += ("." if src else "") + src_type + if src_column: + src += ("." if src else "") + src_column + if src_row > 0: + src += (" " if src else "") + f"[{src_row}]" + if not src: + if issue.get("warning"): + src = "Warning" + elif issue.get("error"): + src = "Error" + else: + src = "Issue" + return src + issue_message = None + if issue: + if error := issue.get("error"): + issue_message = error + elif warning := issue.get("warning"): + issue_message = warning + return f"{src_string(issue)}: {issue_message}" if issue_message else "" + def _load_file(self, file: str) -> None: # Returns a dictionary where each property is the name (i.e. the type) of the data, # and the value is array of dictionaries for the data itself. Handle these kinds of files: @@ -127,13 +167,13 @@ def _load_packed_file(self, file: str) -> None: self._load_file(file) def _load_csv_file(self, file: str) -> None: - self._load_reader(reader := CsvReader(file), type_name=Schema.type_name(file)) + self._load_reader(CsvReader(file), type_name=Schema.type_name(file)) def _load_excel_file(self, file: str) -> None: excel = Excel(file) # Order the sheet names by any specified ordering (e.g. ala snovault.loadxl). order = {Schema.type_name(key): index for index, key in enumerate(self._order)} if self._order else {} for sheet_name in sorted(excel.sheet_names, key=lambda key: order.get(Schema.type_name(key), sys.maxsize)): - self._load_reader(reader := excel.sheet_reader(sheet_name), type_name=Schema.type_name(sheet_name)) + self._load_reader(excel.sheet_reader(sheet_name), type_name=Schema.type_name(sheet_name)) def _load_json_file(self, file: str) -> None: with open(file) as f: @@ -299,7 +339,7 @@ def __init__(self, schema_json: dict, portal: Optional[Portal] = None) -> None: "number": self._map_function_number, "string": self._map_function_string } - self._resolved_refs = [] + self._resolved_refs = set() self._unresolved_refs = [] self._typeinfo = self._create_typeinfo(schema_json) @@ -319,7 +359,7 @@ def unresolved_refs(self) -> List[dict]: @property def resolved_refs(self) -> List[str]: - return self._resolved_refs + return list(self._resolved_refs) def get_map_value_function(self, column_name: str) -> Optional[Any]: return (self._get_typeinfo(column_name) or {}).get("map") @@ -382,14 +422,14 @@ def map_ref(value: str, link_to: str, portal: Optional[Portal], src: Optional[st nonlocal self, typeinfo if not value: if (column := typeinfo.get("column")) and column in self.data.get("required", []): - self._unresolved_refs.append({"src": src, "ref": f"/{link_to}/"}) + self._unresolved_refs.append({"src": src, "error": f"/{link_to}/"}) elif portal: if not (resolved := portal.ref_exists(link_to, value)): - self._unresolved_refs.append({"src": src, "ref": f"/{link_to}/{value}"}) + self._unresolved_refs.append({"src": src, "error": f"/{link_to}/{value}"}) elif len(resolved) > 1: - self._unresolved_refs.append({"src": src, "ref": f"/{link_to}/{value}", "types": resolved}) + self._unresolved_refs.append({"src": src, "error": f"/{link_to}/{value}", "types": resolved}) else: - self._resolved_refs.append(f"/{link_to}/{value}") + self._resolved_refs.add(f"/{link_to}/{value}") return value return lambda value, src: map_ref(value, typeinfo.get("linkTo"), self._portal, src) From bac366a11146a8f65edf2c616f5dd2d6fbfa53a8 Mon Sep 17 00:00:00 2001 From: David Michaels Date: Wed, 6 Dec 2023 11:58:01 -0500 Subject: [PATCH 26/53] Formatting support for errors/warnings in structured_data. --- dcicutils/structured_data.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/dcicutils/structured_data.py b/dcicutils/structured_data.py index 358df643e..f7fbf5393 100644 --- a/dcicutils/structured_data.py +++ b/dcicutils/structured_data.py @@ -106,21 +106,25 @@ def resolved_refs(self) -> List[str]: @staticmethod def format_issue(issue: dict, original_file: Optional[str] = None) -> str: def src_string(issue: dict) -> str: + if not isinstance(issue, dict) or not isinstance(issue_src := issue.get("src"), dict): + return "" show_file = original_file and (original_file.endswith(".zip") or original_file.endswith(".tgz") or original_file.endswith(".gz")) - issue_src = issue.get("src") src_file = issue_src.get("file") if show_file else "" src_type = issue_src.get("type") src_column = issue_src.get("column") src_row = issue_src.get("row", 0) if src_file: - src = f"{os.path.basename(src_file)}{':' if src_type or src_column or src_row > 0 else ''}" + src = f"{os.path.basename(src_file)}" + sep = ":" else: src = "" + sep = "." if src_type: - src += ("." if src else "") + src_type + src += (sep if src else "") + src_type + sep = "." if src_column: - src += ("." if src else "") + src_column + src += (sep if src else "") + src_column if src_row > 0: src += (" " if src else "") + f"[{src_row}]" if not src: @@ -137,6 +141,8 @@ def src_string(issue: dict) -> str: issue_message = error elif warning := issue.get("warning"): issue_message = warning + elif issue.get("truncated"): + return f"Truncated result set | More: {issue.get('more')} | See: {issue.get('details')}" return f"{src_string(issue)}: {issue_message}" if issue_message else "" def _load_file(self, file: str) -> None: From 53df9b84851acce4b40b51c712ec6345bedb6f0e Mon Sep 17 00:00:00 2001 From: David Michaels Date: Wed, 6 Dec 2023 12:03:29 -0500 Subject: [PATCH 27/53] up version: 8.4.0.1b4 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 72b757225..b1f6764f2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "dcicutils" -version = "8.4.0.1b4" # TODO: To become 8.4.1 +version = "8.4.0.1b5" # TODO: To become 8.4.1 description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources" authors = ["4DN-DCIC Team "] license = "MIT" From a089a8077e798317f24477a3ba785c9a1b3b6442 Mon Sep 17 00:00:00 2001 From: David Michaels Date: Wed, 6 Dec 2023 16:37:57 -0500 Subject: [PATCH 28/53] Support for File type schemas in structured_data. --- dcicutils/structured_data.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/dcicutils/structured_data.py b/dcicutils/structured_data.py index f7fbf5393..af3caced2 100644 --- a/dcicutils/structured_data.py +++ b/dcicutils/structured_data.py @@ -36,6 +36,8 @@ ARRAY_NAME_SUFFIX_CHAR = "#" ARRAY_NAME_SUFFIX_REGEX = re.compile(rf"{ARRAY_NAME_SUFFIX_CHAR}\d+") DOTTED_NAME_DELIMITER_CHAR = "." +FILE_SCHEMA_NAME = "File" +FILE_SCHEMA_NAME_PROPERTY = "filename" # Forward type references for type hints. Portal = Type["Portal"] @@ -190,7 +192,7 @@ def _load_reader(self, reader: RowReader, type_name: str) -> None: noschema = False structured_row_template = None for row in reader: - if not structured_row_template: # Delay schema creation so we don't reference it if there are no rows. + if not structured_row_template: # Delay creation just so we don't reference schema if there are no rows. if not schema and not noschema and not (schema := Schema.load_by_name(type_name, portal=self._portal)): noschema = True elif schema and (schema_name := schema.name): @@ -367,6 +369,9 @@ def unresolved_refs(self) -> List[dict]: def resolved_refs(self) -> List[str]: return list(self._resolved_refs) + def is_file_type(self) -> bool: + return (self.name == FILE_SCHEMA_NAME) or (self._portal and self._portal.is_file_schema(self.name)) + def get_map_value_function(self, column_name: str) -> Optional[Any]: return (self._get_typeinfo(column_name) or {}).get("map") @@ -776,6 +781,12 @@ def get_schemas(self) -> dict: schemas[user_specified_schema["title"]] = user_specified_schema return schemas + def is_file_schema(self, schema_name: str) -> bool: + if super_type_map := self.get_schemas_super_type_map(): + if file_super_type := super_type_map.get(FILE_SCHEMA_NAME): + return Schema.type_name(schema_name) in file_super_type + return False + @lru_cache(maxsize=1) def get_schemas_super_type_map(self) -> dict: """ From cccace8d054b1d51f95ddbd367eaec0e386a8527 Mon Sep 17 00:00:00 2001 From: David Michaels Date: Wed, 6 Dec 2023 17:46:01 -0500 Subject: [PATCH 29/53] update versin --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index b1f6764f2..389578815 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "dcicutils" -version = "8.4.0.1b5" # TODO: To become 8.4.1 +version = "8.4.0.1b7" # TODO: To become 8.4.1 description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources" authors = ["4DN-DCIC Team "] license = "MIT" From bae0468be90d3d0b9ea44d621c48c580fc125d33 Mon Sep 17 00:00:00 2001 From: David Michaels Date: Wed, 6 Dec 2023 17:58:37 -0500 Subject: [PATCH 30/53] lint fix --- dcicutils/structured_data.py | 1 - 1 file changed, 1 deletion(-) diff --git a/dcicutils/structured_data.py b/dcicutils/structured_data.py index af3caced2..ac5927a8f 100644 --- a/dcicutils/structured_data.py +++ b/dcicutils/structured_data.py @@ -37,7 +37,6 @@ ARRAY_NAME_SUFFIX_REGEX = re.compile(rf"{ARRAY_NAME_SUFFIX_CHAR}\d+") DOTTED_NAME_DELIMITER_CHAR = "." FILE_SCHEMA_NAME = "File" -FILE_SCHEMA_NAME_PROPERTY = "filename" # Forward type references for type hints. Portal = Type["Portal"] From 01d34cf28685d26510b9070142a7462e13e40a01 Mon Sep 17 00:00:00 2001 From: David Michaels Date: Thu, 7 Dec 2023 14:33:22 -0500 Subject: [PATCH 31/53] Remove structured_data.StructuredDataSet.format_issue. --- dcicutils/structured_data.py | 42 ------------------------------------ 1 file changed, 42 deletions(-) diff --git a/dcicutils/structured_data.py b/dcicutils/structured_data.py index ac5927a8f..ccb3bc21f 100644 --- a/dcicutils/structured_data.py +++ b/dcicutils/structured_data.py @@ -104,48 +104,6 @@ def validation_errors(self) -> List[dict]: def resolved_refs(self) -> List[str]: return self._resolved_refs - @staticmethod - def format_issue(issue: dict, original_file: Optional[str] = None) -> str: - def src_string(issue: dict) -> str: - if not isinstance(issue, dict) or not isinstance(issue_src := issue.get("src"), dict): - return "" - show_file = original_file and (original_file.endswith(".zip") or - original_file.endswith(".tgz") or original_file.endswith(".gz")) - src_file = issue_src.get("file") if show_file else "" - src_type = issue_src.get("type") - src_column = issue_src.get("column") - src_row = issue_src.get("row", 0) - if src_file: - src = f"{os.path.basename(src_file)}" - sep = ":" - else: - src = "" - sep = "." - if src_type: - src += (sep if src else "") + src_type - sep = "." - if src_column: - src += (sep if src else "") + src_column - if src_row > 0: - src += (" " if src else "") + f"[{src_row}]" - if not src: - if issue.get("warning"): - src = "Warning" - elif issue.get("error"): - src = "Error" - else: - src = "Issue" - return src - issue_message = None - if issue: - if error := issue.get("error"): - issue_message = error - elif warning := issue.get("warning"): - issue_message = warning - elif issue.get("truncated"): - return f"Truncated result set | More: {issue.get('more')} | See: {issue.get('details')}" - return f"{src_string(issue)}: {issue_message}" if issue_message else "" - def _load_file(self, file: str) -> None: # Returns a dictionary where each property is the name (i.e. the type) of the data, # and the value is array of dictionaries for the data itself. Handle these kinds of files: From 03d76935a2d1b5096e95d50fe961b3fc42704ade Mon Sep 17 00:00:00 2001 From: David Michaels Date: Thu, 7 Dec 2023 14:33:33 -0500 Subject: [PATCH 32/53] Minor test fix --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 389578815..882435372 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "dcicutils" -version = "8.4.0.1b7" # TODO: To become 8.4.1 +version = "8.4.0.1b8" # TODO: To become 8.4.1 description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources" authors = ["4DN-DCIC Team "] license = "MIT" From 909083340168beae660455faff8aff09456ff2f6 Mon Sep 17 00:00:00 2001 From: David Michaels Date: Thu, 7 Dec 2023 16:08:31 -0500 Subject: [PATCH 33/53] removed obsolete function --- dcicutils/structured_data.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/dcicutils/structured_data.py b/dcicutils/structured_data.py index ccb3bc21f..ce7dea9ba 100644 --- a/dcicutils/structured_data.py +++ b/dcicutils/structured_data.py @@ -326,9 +326,6 @@ def unresolved_refs(self) -> List[dict]: def resolved_refs(self) -> List[str]: return list(self._resolved_refs) - def is_file_type(self) -> bool: - return (self.name == FILE_SCHEMA_NAME) or (self._portal and self._portal.is_file_schema(self.name)) - def get_map_value_function(self, column_name: str) -> Optional[Any]: return (self._get_typeinfo(column_name) or {}).get("map") From 98ce25f193b6db9298382868ad115c2e5a08e995 Mon Sep 17 00:00:00 2001 From: David Michaels Date: Thu, 7 Dec 2023 17:59:05 -0500 Subject: [PATCH 34/53] Minor fix to Portal class in structured_data. --- dcicutils/structured_data.py | 1 + 1 file changed, 1 insertion(+) diff --git a/dcicutils/structured_data.py b/dcicutils/structured_data.py index ce7dea9ba..63575bee6 100644 --- a/dcicutils/structured_data.py +++ b/dcicutils/structured_data.py @@ -558,6 +558,7 @@ def __init__(self, elif isinstance(key, dict): self._key = key self._key_pair = (key.get("key"), key.get("secret")) if key else None + self._server = key.get("server") elif isinstance(key, tuple) and len(key) >= 2: self._key = {"key": key[0], "secret": key[1]} self._key_pair = key From 27da607d9220c690399c8aa72f1d0a85f5878c5b Mon Sep 17 00:00:00 2001 From: David Michaels Date: Thu, 7 Dec 2023 18:05:42 -0500 Subject: [PATCH 35/53] version update --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 882435372..8cc28fb5a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "dcicutils" -version = "8.4.0.1b8" # TODO: To become 8.4.1 +version = "8.4.0.1b9" # TODO: To become 8.4.1 description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources" authors = ["4DN-DCIC Team "] license = "MIT" From fe38d1d6325c3c701629be7680c4929bbea287b7 Mon Sep 17 00:00:00 2001 From: David Michaels Date: Sat, 9 Dec 2023 08:14:41 -0500 Subject: [PATCH 36/53] typeo --- dcicutils/structured_data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dcicutils/structured_data.py b/dcicutils/structured_data.py index 63575bee6..670b6a175 100644 --- a/dcicutils/structured_data.py +++ b/dcicutils/structured_data.py @@ -636,7 +636,7 @@ def _kwargs(self, **kwargs) -> dict: def _response(self, response) -> Optional[RequestResponse]: if response and isinstance(getattr(response.__class__, "json"), property): class RequestResponseWrapper: # For consistency change json property to method. - def __init__(self, respnose, **kwargs): + def __init__(self, response, **kwargs): super().__init__(**kwargs) self._response = response def __getattr__(self, attr): # noqa From 04fe8acb5506dac224e021ce2085179d56d65bfc Mon Sep 17 00:00:00 2001 From: David Michaels Date: Sat, 9 Dec 2023 14:48:42 -0500 Subject: [PATCH 37/53] added structured_data.StructuredDataSet.upload_files --- dcicutils/structured_data.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/dcicutils/structured_data.py b/dcicutils/structured_data.py index 670b6a175..c7767e105 100644 --- a/dcicutils/structured_data.py +++ b/dcicutils/structured_data.py @@ -37,6 +37,7 @@ ARRAY_NAME_SUFFIX_REGEX = re.compile(rf"{ARRAY_NAME_SUFFIX_CHAR}\d+") DOTTED_NAME_DELIMITER_CHAR = "." FILE_SCHEMA_NAME = "File" +FILE_SCHEMA_NAME_PROPERTY = "filename" # Forward type references for type hints. Portal = Type["Portal"] @@ -104,6 +105,17 @@ def validation_errors(self) -> List[dict]: def resolved_refs(self) -> List[str]: return self._resolved_refs + @property + def upload_files(self) -> List[str]: + result = [] + if self._portal: + for type_name in self.data: + if self._portal.is_file_schema(type_name): + for item in self.data[type_name]: + if (file_name := item.get(FILE_SCHEMA_NAME_PROPERTY)): + result.append({"type": type_name, "file": file_name}) + return result + def _load_file(self, file: str) -> None: # Returns a dictionary where each property is the name (i.e. the type) of the data, # and the value is array of dictionaries for the data itself. Handle these kinds of files: From b6b4ee1bd6bc203fa320256c7855c43ef8eb4240 Mon Sep 17 00:00:00 2001 From: David Michaels Date: Sat, 9 Dec 2023 14:49:00 -0500 Subject: [PATCH 38/53] added structured_data.StructuredDataSet.upload_files --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 8cc28fb5a..58b5ed066 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "dcicutils" -version = "8.4.0.1b9" # TODO: To become 8.4.1 +version = "8.4.0.1b10" # TODO: To become 8.4.1 description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources" authors = ["4DN-DCIC Team "] license = "MIT" From fe41fa94c2b9a1a70014d98d1dff61e1412fd9b7 Mon Sep 17 00:00:00 2001 From: David Michaels Date: Sat, 9 Dec 2023 15:08:30 -0500 Subject: [PATCH 39/53] fleshed out structured_data.PortalBase more. --- dcicutils/structured_data.py | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/dcicutils/structured_data.py b/dcicutils/structured_data.py index c7767e105..8d7d1ee03 100644 --- a/dcicutils/structured_data.py +++ b/dcicutils/structured_data.py @@ -547,6 +547,7 @@ def __init__(self, env: Optional[str] = None, app: OrchestratedApp = APP_SMAHT, server: Optional[str] = None, key: Optional[Union[dict, tuple]] = None, portal: Optional[Union[VirtualApp, TestApp, Router, Portal, str]] = None) -> PortalBase: + import pdb ; pdb.set_trace() if ((isinstance(arg, (VirtualApp, TestApp, Router, Portal)) or isinstance(arg, str) and arg.endswith(".ini")) and not portal): portal = arg @@ -557,6 +558,8 @@ def __init__(self, self._vapp = None self._key = None self._key_pair = None + self._env = env + self._app = app self._server = None if isinstance(portal, Portal): self._vapp = portal._vapp @@ -585,6 +588,35 @@ def __init__(self, self._key = key_manager.get_keydict_for_server(server) self._server = server self._key_pair = key_manager.keydict_to_keypair(self._key) if self._key else None + self._key_file = key_manager.keys_file + + @property + def env(self): + return self._env + + @property + def app(self): + return self._app + + @property + def key(self): + return self._key + + @property + def key_pair(self): + return self._key_pair + + @property + def key_file(self): + return self._key_file + + @property + def server(self): + return self._server + + @property + def vapp(self): + return self._vapp def get_metadata(self, object_id: str) -> Optional[dict]: return get_metadata(obj_id=object_id, vapp=self._vapp, key=self._key) From f30b421e5013a186cf0ec4f312e93306e663b4f4 Mon Sep 17 00:00:00 2001 From: David Michaels Date: Sat, 9 Dec 2023 15:08:49 -0500 Subject: [PATCH 40/53] removed pdb --- dcicutils/structured_data.py | 1 - 1 file changed, 1 deletion(-) diff --git a/dcicutils/structured_data.py b/dcicutils/structured_data.py index 8d7d1ee03..a4573f1c9 100644 --- a/dcicutils/structured_data.py +++ b/dcicutils/structured_data.py @@ -547,7 +547,6 @@ def __init__(self, env: Optional[str] = None, app: OrchestratedApp = APP_SMAHT, server: Optional[str] = None, key: Optional[Union[dict, tuple]] = None, portal: Optional[Union[VirtualApp, TestApp, Router, Portal, str]] = None) -> PortalBase: - import pdb ; pdb.set_trace() if ((isinstance(arg, (VirtualApp, TestApp, Router, Portal)) or isinstance(arg, str) and arg.endswith(".ini")) and not portal): portal = arg From d83cf8f2cb68353c9062804ef941b3921fac3370 Mon Sep 17 00:00:00 2001 From: David Michaels Date: Sat, 9 Dec 2023 15:11:44 -0500 Subject: [PATCH 41/53] typo --- dcicutils/structured_data.py | 1 + 1 file changed, 1 insertion(+) diff --git a/dcicutils/structured_data.py b/dcicutils/structured_data.py index a4573f1c9..49a5896a4 100644 --- a/dcicutils/structured_data.py +++ b/dcicutils/structured_data.py @@ -557,6 +557,7 @@ def __init__(self, self._vapp = None self._key = None self._key_pair = None + self._key_file = None self._env = env self._app = app self._server = None From adf94e1db45c05bbb2204883c7ce4ff75f4342af Mon Sep 17 00:00:00 2001 From: David Michaels Date: Sat, 9 Dec 2023 15:18:51 -0500 Subject: [PATCH 42/53] minor code cleanup --- dcicutils/structured_data.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/dcicutils/structured_data.py b/dcicutils/structured_data.py index 49a5896a4..45678f0c1 100644 --- a/dcicutils/structured_data.py +++ b/dcicutils/structured_data.py @@ -543,11 +543,11 @@ def array_indices(name: str) -> Tuple[Optional[str], Optional[List[int]]]: class PortalBase: def __init__(self, - arg: Optional[Union[VirtualApp, TestApp, Router, Portal, dict, tuple, str]] = None, + arg: Optional[Union[VirtualApp, TestApp, Router, PortalBase, dict, tuple, str]] = None, env: Optional[str] = None, app: OrchestratedApp = APP_SMAHT, server: Optional[str] = None, key: Optional[Union[dict, tuple]] = None, - portal: Optional[Union[VirtualApp, TestApp, Router, Portal, str]] = None) -> PortalBase: - if ((isinstance(arg, (VirtualApp, TestApp, Router, Portal)) or + portal: Optional[Union[VirtualApp, TestApp, Router, PortalBase, str]] = None) -> PortalBase: + if ((isinstance(arg, (VirtualApp, TestApp, Router, PortalBase)) or isinstance(arg, str) and arg.endswith(".ini")) and not portal): portal = arg elif isinstance(arg, str) and not env: @@ -561,7 +561,7 @@ def __init__(self, self._env = env self._app = app self._server = None - if isinstance(portal, Portal): + if isinstance(portal, PortalBase): self._vapp = portal._vapp self._key = portal._key self._key_pair = portal._key_pair @@ -693,15 +693,15 @@ def json(self): # noqa @staticmethod def create_for_testing(ini_file: Optional[str] = None) -> PortalBase: if isinstance(ini_file, str): - return Portal(Portal._create_testapp(ini_file)) + return PortalBase(PortalBase._create_testapp(ini_file)) minimal_ini_for_unit_testing = "[app:app]\nuse = egg:encoded\nsqlalchemy.url = postgresql://dummy\n" with temporary_file(content=minimal_ini_for_unit_testing, suffix=".ini") as ini_file: - return Portal(Portal._create_testapp(ini_file)) + return PortalBase(PortalBase._create_testapp(ini_file)) @staticmethod - def create_for_testing_local(ini_file: Optional[str] = None) -> Portal: + def create_for_testing_local(ini_file: Optional[str] = None) -> PortalBase: if isinstance(ini_file, str) and ini_file: - return Portal(Portal._create_testapp(ini_file)) + return PortalBase(PortalBase._create_testapp(ini_file)) minimal_ini_for_testing_local = "\n".join([ "[app:app]\nuse = egg:encoded\nfile_upload_bucket = dummy", "sqlalchemy.url = postgresql://postgres@localhost:5441/postgres?host=/tmp/snovault/pgdata", @@ -722,7 +722,7 @@ def create_for_testing_local(ini_file: Optional[str] = None) -> Portal: "multiauth.policy.auth0.base = encoded.authentication.Auth0AuthenticationPolicy" ]) with temporary_file(content=minimal_ini_for_testing_local, suffix=".ini") as minimal_ini_file: - return Portal(Portal._create_testapp(minimal_ini_file)) + return PortalBase(PortalBase._create_testapp(minimal_ini_file)) @staticmethod def _create_testapp(value: Union[str, Router, TestApp] = "development.ini") -> TestApp: From 2d0d55c9d781c8993b964eaa443cd5dc989bca80 Mon Sep 17 00:00:00 2001 From: David Michaels Date: Sat, 9 Dec 2023 15:28:35 -0500 Subject: [PATCH 43/53] version 8.4.0.1b11 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 58b5ed066..3d4df104d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "dcicutils" -version = "8.4.0.1b10" # TODO: To become 8.4.1 +version = "8.4.0.1b11" # TODO: To become 8.4.1 description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources" authors = ["4DN-DCIC Team "] license = "MIT" From 48886a6dfaca371f184edc2d78ca7925ee2c4159 Mon Sep 17 00:00:00 2001 From: David Michaels Date: Sun, 10 Dec 2023 14:07:42 -0500 Subject: [PATCH 44/53] Refactored PortalBase from structured_data into portal_utils.Portal --- dcicutils/portal_utils.py | 267 +++++++++++++++++++++++++++++++++++ dcicutils/structured_data.py | 250 +------------------------------- 2 files changed, 274 insertions(+), 243 deletions(-) create mode 100644 dcicutils/portal_utils.py diff --git a/dcicutils/portal_utils.py b/dcicutils/portal_utils.py new file mode 100644 index 000000000..551a0201c --- /dev/null +++ b/dcicutils/portal_utils.py @@ -0,0 +1,267 @@ +from collections import deque +from pyramid.paster import get_app +from pyramid.router import Router +import re +import requests +from requests.models import Response as RequestResponse +from typing import Optional, Type, Union +from webtest.app import TestApp, TestResponse +from dcicutils.common import OrchestratedApp, APP_CGAP, APP_FOURFRONT, APP_SMAHT, ORCHESTRATED_APPS +from dcicutils.creds_utils import CGAPKeyManager, FourfrontKeyManager, SMaHTKeyManager +from dcicutils.ff_utils import get_metadata, get_schema, patch_metadata, post_metadata +from dcicutils.misc_utils import to_camel_case, VirtualApp +from dcicutils.zip_utils import temporary_file + +Portal = Type["Portal"] # Forward type reference for type hints. +FILE_SCHEMA_NAME = "File" + + +class Portal: + + def __init__(self, + arg: Optional[Union[VirtualApp, TestApp, Router, Portal, dict, tuple, str]] = None, + env: Optional[str] = None, app: Optional[OrchestratedApp] = None, server: Optional[str] = None, + key: Optional[Union[dict, tuple]] = None, + vapp: Optional[Union[VirtualApp, TestApp, Router, Portal, str]] = None, + portal: Optional[Union[VirtualApp, TestApp, Router, Portal, str]] = None) -> Portal: + if vapp and not portal: + portal = vapp + if ((isinstance(arg, (VirtualApp, TestApp, Router, Portal)) or + isinstance(arg, str) and arg.endswith(".ini")) and not portal): + portal = arg + elif isinstance(arg, str) and not env: + env = arg + elif (isinstance(arg, dict) or isinstance(arg, tuple)) and not key: + key = arg + if not app and env: + if env.startswith(APP_SMAHT): + app = APP_SMAHT + elif env.startswith(APP_CGAP): + app = APP_CGAP + elif env.startswith(APP_FOURFRONT): + app = APP_FOURFRONT + if isinstance(portal, Portal): + self._vapp = portal._vapp + self._env = portal._env + self._app = portal._app + self._server = portal._server + self._key = portal._key + self._key_pair = portal._key_pair + self._key_file = portal._key_file + return + self._vapp = None + self._env = env + self._app = app + self._server = server + self._key = None + self._key_pair = None + self._key_file = None + if isinstance(portal, (VirtualApp, TestApp)): + self._vapp = portal + elif isinstance(portal, (Router, str)): + self._vapp = Portal._create_testapp(portal) + elif isinstance(key, dict): + self._key = key + self._key_pair = (key.get("key"), key.get("secret")) if key else None + if key_server := key.get("server"): + self._server = key_server + elif isinstance(key, tuple) and len(key) >= 2: + self._key = {"key": key[0], "secret": key[1]} + self._key_pair = key + elif isinstance(env, str): + key_managers = {APP_CGAP: CGAPKeyManager, APP_FOURFRONT: FourfrontKeyManager, APP_SMAHT: SMaHTKeyManager} + if not (key_manager := key_managers.get(self._app)) or not (key_manager := key_manager()): + raise Exception(f"Invalid app name: {self._app} (valid: {', '.join(ORCHESTRATED_APPS)}).") + if isinstance(env, str): + self._key = key_manager.get_keydict_for_env(env) + if key_server := self._key.get("server"): + self._server = key_server + elif isinstance(self._server, str): + self._key = key_manager.get_keydict_for_server(self._server) + self._key_pair = key_manager.keydict_to_keypair(self._key) if self._key else None + self._key_file = key_manager.keys_file + + @property + def env(self): + return self._env + + @property + def app(self): + return self._app + + @property + def server(self): + return self._server + + @property + def key(self): + return self._key + + @property + def key_pair(self): + return self._key_pair + + @property + def key_file(self): + return self._key_file + + @property + def vapp(self): + return self._vapp + + def get_metadata(self, object_id: str) -> Optional[dict]: + return get_metadata(obj_id=object_id, vapp=self._vapp, key=self._key) + + def patch_metadata(self, object_id: str, data: str) -> Optional[dict]: + if self._key: + return patch_metadata(obj_id=object_id, patch_item=data, key=self._key) + return self.patch(f"/{object_id}", data) + + def post_metadata(self, object_type: str, data: str) -> Optional[dict]: + if self._key: + return post_metadata(schema_name=object_type, post_item=data, key=self._key) + return self.post(f"/{object_type}", data) + + def get(self, uri: str, follow: bool = True, **kwargs) -> Optional[Union[RequestResponse, TestResponse]]: + if isinstance(self._vapp, (VirtualApp, TestApp)): + response = self._vapp.get(self._uri(uri), **self._kwargs(**kwargs)) + if response and response.status_code in [301, 302, 303, 307, 308] and follow: + response = response.follow() + return self._response(response) + return requests.get(self._uri(uri), allow_redirects=follow, **self._kwargs(**kwargs)) + + def patch(self, uri: str, data: Optional[dict] = None, + json: Optional[dict] = None, **kwargs) -> Optional[Union[RequestResponse, TestResponse]]: + if isinstance(self._vapp, (VirtualApp, TestApp)): + return self._vapp.patch_json(self._uri(uri), json or data, **self._kwargs(**kwargs)) + return requests.patch(self._uri(uri), json=json or data, **self._kwargs(**kwargs)) + + def post(self, uri: str, data: Optional[dict] = None, json: Optional[dict] = None, + files: Optional[dict] = None, **kwargs) -> Optional[Union[RequestResponse, TestResponse]]: + if isinstance(self._vapp, (VirtualApp, TestApp)): + if files: + return self._vapp.post(self._uri(uri), json or data, upload_files=files, **self._kwargs(**kwargs)) + else: + return self._vapp.post_json(self._uri(uri), json or data, upload_files=files, **self._kwargs(**kwargs)) + return requests.post(self._uri(uri), json=json or data, files=files, **self._kwargs(**kwargs)) + + def get_schema(self, schema_name: str) -> Optional[dict]: + return get_schema(self.schema_name(schema_name), portal_vapp=self._vapp, key=self._key) + + def get_schemas(self) -> dict: + return self.get("/profiles/").json() + + @staticmethod + def schema_name(name: str) -> str: + return to_camel_case(name) + + def is_file_schema(self, schema_name: str) -> bool: + if super_type_map := self.get_schemas_super_type_map(): + if file_super_type := super_type_map.get(FILE_SCHEMA_NAME): + return self.schema_name(schema_name) in file_super_type + return False + + def get_schemas_super_type_map(self) -> dict: + """ + Returns the "super type map" for all of the known schemas (via /profiles). + This is a dictionary of all types which have (one or more) sub-types whose value is + an array of all of those sub-types (direct and all descendents), in breadth first order. + """ + def breadth_first(super_type_map: dict, super_type_name: str) -> dict: + result = [] + queue = deque(super_type_map.get(super_type_name, [])) + while queue: + result.append(sub_type_name := queue.popleft()) + if sub_type_name in super_type_map: + queue.extend(super_type_map[sub_type_name]) + return result + if not (schemas := self.get_schemas()): + return {} + super_type_map = {} + for type_name in schemas: + if super_type_name := schemas[type_name].get("rdfs:subClassOf"): + super_type_name = super_type_name.replace("/profiles/", "").replace(".json", "") + if super_type_name != "Item": + if not super_type_map.get(super_type_name): + super_type_map[super_type_name] = [type_name] + elif type_name not in super_type_map[super_type_name]: + super_type_map[super_type_name].append(type_name) + super_type_map_flattened = {} + for super_type_name in super_type_map: + super_type_map_flattened[super_type_name] = breadth_first(super_type_map, super_type_name) + return super_type_map_flattened + + def _uri(self, uri: str) -> str: + if not isinstance(uri, str) or not uri: + return "/" + if uri.lower().startswith("http://") or uri.lower().startswith("https://"): + return uri + uri = re.sub(r"/+", "/", uri) + return (self._server + ("/" if uri.startswith("/") else "") + uri) if self._server else uri + + def _kwargs(self, **kwargs) -> dict: + result_kwargs = {"headers": + kwargs.get("headers", {"Content-type": "application/json", "Accept": "application/json"})} + if self._key_pair: + result_kwargs["auth"] = self._key_pair + if isinstance(timeout := kwargs.get("timeout"), int): + result_kwargs["timeout"] = timeout + return result_kwargs + + def _response(self, response) -> Optional[RequestResponse]: + if response and isinstance(getattr(response.__class__, "json"), property): + class RequestResponseWrapper: # For consistency change json property to method. + def __init__(self, response, **kwargs): + super().__init__(**kwargs) + self._response = response + def __getattr__(self, attr): # noqa + return getattr(self._response, attr) + def json(self): # noqa + return self._response.json + response = RequestResponseWrapper(response) + return response + + @staticmethod + def create_for_testing(ini_file: Optional[str] = None) -> Portal: + if isinstance(ini_file, str): + return Portal(Portal._create_testapp(ini_file)) + minimal_ini_for_unit_testing = "[app:app]\nuse = egg:encoded\nsqlalchemy.url = postgresql://dummy\n" + with temporary_file(content=minimal_ini_for_unit_testing, suffix=".ini") as ini_file: + return Portal(Portal._create_testapp(ini_file)) + + @staticmethod + def create_for_testing_local(ini_file: Optional[str] = None) -> Portal: + if isinstance(ini_file, str) and ini_file: + return Portal(Portal._create_testapp(ini_file)) + minimal_ini_for_testing_local = "\n".join([ + "[app:app]\nuse = egg:encoded\nfile_upload_bucket = dummy", + "sqlalchemy.url = postgresql://postgres@localhost:5441/postgres?host=/tmp/snovault/pgdata", + "multiauth.groupfinder = encoded.authorization.smaht_groupfinder", + "multiauth.policies = auth0 session remoteuser accesskey", + "multiauth.policy.session.namespace = mailto", + "multiauth.policy.session.use = encoded.authentication.NamespacedAuthenticationPolicy", + "multiauth.policy.session.base = pyramid.authentication.SessionAuthenticationPolicy", + "multiauth.policy.remoteuser.namespace = remoteuser", + "multiauth.policy.remoteuser.use = encoded.authentication.NamespacedAuthenticationPolicy", + "multiauth.policy.remoteuser.base = pyramid.authentication.RemoteUserAuthenticationPolicy", + "multiauth.policy.accesskey.namespace = accesskey", + "multiauth.policy.accesskey.use = encoded.authentication.NamespacedAuthenticationPolicy", + "multiauth.policy.accesskey.base = encoded.authentication.BasicAuthAuthenticationPolicy", + "multiauth.policy.accesskey.check = encoded.authentication.basic_auth_check", + "multiauth.policy.auth0.use = encoded.authentication.NamespacedAuthenticationPolicy", + "multiauth.policy.auth0.namespace = auth0", + "multiauth.policy.auth0.base = encoded.authentication.Auth0AuthenticationPolicy" + ]) + with temporary_file(content=minimal_ini_for_testing_local, suffix=".ini") as minimal_ini_file: + return Portal(Portal._create_testapp(minimal_ini_file)) + + @staticmethod + def _create_testapp(value: Union[str, Router, TestApp] = "development.ini") -> TestApp: + """ + Creates and returns a TestApp. Refactored out of above loadxl code to consolidate at a + single point; also for use by the generate_local_access_key and view_local_object scripts. + """ + if isinstance(value, TestApp): + return value + app = value if isinstance(value, Router) else get_app(value, "app") + return TestApp(app, {"HTTP_ACCEPT": "application/json", "REMOTE_USER": "TEST"}) diff --git a/dcicutils/structured_data.py b/dcicutils/structured_data.py index 45678f0c1..b9d0a8286 100644 --- a/dcicutils/structured_data.py +++ b/dcicutils/structured_data.py @@ -1,24 +1,19 @@ -from collections import deque import copy from functools import lru_cache import json from jsonschema import Draft7Validator as SchemaValidator import os -from pyramid.paster import get_app from pyramid.router import Router import re -import requests -from requests.models import Response as RequestResponse import sys from typing import Any, Callable, List, Optional, Tuple, Type, Union -from webtest.app import TestApp, TestResponse -from dcicutils.common import OrchestratedApp, APP_CGAP, APP_FOURFRONT, APP_SMAHT, ORCHESTRATED_APPS -from dcicutils.creds_utils import CGAPKeyManager, FourfrontKeyManager, SMaHTKeyManager +from webtest.app import TestApp +from dcicutils.common import OrchestratedApp from dcicutils.data_readers import CsvReader, Excel, RowReader -from dcicutils.ff_utils import get_metadata, get_schema, patch_metadata, post_metadata from dcicutils.misc_utils import (create_object, load_json_if, merge_objects, remove_empty_properties, right_trim, split_string, to_boolean, to_camel_case, to_enum, to_float, to_integer, VirtualApp) -from dcicutils.zip_utils import temporary_file, unpack_gz_file_to_temporary_file, unpack_files +from dcicutils.portal_utils import Portal as PortalBase +from dcicutils.zip_utils import unpack_gz_file_to_temporary_file, unpack_files # Classes/functions to parse a CSV or Excel Spreadsheet into structured data, using a specialized @@ -36,12 +31,10 @@ ARRAY_NAME_SUFFIX_CHAR = "#" ARRAY_NAME_SUFFIX_REGEX = re.compile(rf"{ARRAY_NAME_SUFFIX_CHAR}\d+") DOTTED_NAME_DELIMITER_CHAR = "." -FILE_SCHEMA_NAME = "File" FILE_SCHEMA_NAME_PROPERTY = "filename" # Forward type references for type hints. Portal = Type["Portal"] -PortalBase = Type["PortalBase"] Schema = Type["Schema"] StructuredDataSet = Type["StructuredDataSet"] @@ -526,7 +519,7 @@ def unadorn_column_name(column_name: str, full: bool = True) -> str: @staticmethod def type_name(value: str) -> str: # File or other name. name = os.path.basename(value).replace(" ", "") if isinstance(value, str) else "" - return to_camel_case(name[0:dot] if (dot := name.rfind(".")) > 0 else name) + return PortalBase.schema_name(name[0:dot] if (dot := name.rfind(".")) > 0 else name) @staticmethod def array_indices(name: str) -> Tuple[Optional[str], Optional[List[int]]]: @@ -540,207 +533,11 @@ def array_indices(name: str) -> Tuple[Optional[str], Optional[List[int]]]: return (name, indices) if indices else (None, None) -class PortalBase: - - def __init__(self, - arg: Optional[Union[VirtualApp, TestApp, Router, PortalBase, dict, tuple, str]] = None, - env: Optional[str] = None, app: OrchestratedApp = APP_SMAHT, server: Optional[str] = None, - key: Optional[Union[dict, tuple]] = None, - portal: Optional[Union[VirtualApp, TestApp, Router, PortalBase, str]] = None) -> PortalBase: - if ((isinstance(arg, (VirtualApp, TestApp, Router, PortalBase)) or - isinstance(arg, str) and arg.endswith(".ini")) and not portal): - portal = arg - elif isinstance(arg, str) and not env: - env = arg - elif (isinstance(arg, dict) or isinstance(arg, tuple)) and not key: - key = arg - self._vapp = None - self._key = None - self._key_pair = None - self._key_file = None - self._env = env - self._app = app - self._server = None - if isinstance(portal, PortalBase): - self._vapp = portal._vapp - self._key = portal._key - self._key_pair = portal._key_pair - self._server = portal._server - elif isinstance(portal, (VirtualApp, TestApp)): - self._vapp = portal - elif isinstance(portal, (Router, str)): - self._vapp = PortalBase._create_testapp(portal) - elif isinstance(key, dict): - self._key = key - self._key_pair = (key.get("key"), key.get("secret")) if key else None - self._server = key.get("server") - elif isinstance(key, tuple) and len(key) >= 2: - self._key = {"key": key[0], "secret": key[1]} - self._key_pair = key - elif isinstance(env, str): - key_managers = {APP_CGAP: CGAPKeyManager, APP_FOURFRONT: FourfrontKeyManager, APP_SMAHT: SMaHTKeyManager} - if not (key_manager := key_managers.get(app)) or not (key_manager := key_manager()): - raise Exception(f"Invalid app name: {app} (valid: {', '.join(ORCHESTRATED_APPS)}).") - if isinstance(env, str): - self._key = key_manager.get_keydict_for_env(env) - self._server = self._key.get("server") if self._key else None - elif isinstance(server, str): - self._key = key_manager.get_keydict_for_server(server) - self._server = server - self._key_pair = key_manager.keydict_to_keypair(self._key) if self._key else None - self._key_file = key_manager.keys_file - - @property - def env(self): - return self._env - - @property - def app(self): - return self._app - - @property - def key(self): - return self._key - - @property - def key_pair(self): - return self._key_pair - - @property - def key_file(self): - return self._key_file - - @property - def server(self): - return self._server - - @property - def vapp(self): - return self._vapp - - def get_metadata(self, object_id: str) -> Optional[dict]: - return get_metadata(obj_id=object_id, vapp=self._vapp, key=self._key) - - def patch_metadata(self, object_id: str, data: str) -> Optional[dict]: - if self._key: - return patch_metadata(obj_id=object_id, patch_item=data, key=self._key) - return self.patch(f"/{object_id}", data) - - def post_metadata(self, object_type: str, data: str) -> Optional[dict]: - if self._key: - return post_metadata(schema_name=object_type, post_item=data, key=self._key) - return self.post(f"/{object_type}", data) - - def get(self, uri: str, follow: bool = True, **kwargs) -> Optional[Union[RequestResponse, TestResponse]]: - if isinstance(self._vapp, (VirtualApp, TestApp)): - response = self._vapp.get(self._uri(uri), **self._kwargs(**kwargs)) - if response and response.status_code in [301, 302, 303, 307, 308] and follow: - response = response.follow() - return self._response(response) - return requests.get(self._uri(uri), allow_redirects=follow, **self._kwargs(**kwargs)) - - def patch(self, uri: str, data: Optional[dict] = None, - json: Optional[dict] = None, **kwargs) -> Optional[Union[RequestResponse, TestResponse]]: - if isinstance(self._vapp, (VirtualApp, TestApp)): - return self._vapp.patch_json(self._uri(uri), json or data, **self._kwargs(**kwargs)) - return requests.patch(self._uri(uri), json=json or data, **self._kwargs(**kwargs)) - - def post(self, uri: str, data: Optional[dict] = None, json: Optional[dict] = None, - files: Optional[dict] = None, **kwargs) -> Optional[Union[RequestResponse, TestResponse]]: - if isinstance(self._vapp, (VirtualApp, TestApp)): - if files: - return self._vapp.post(self._uri(uri), json or data, upload_files=files, **self._kwargs(**kwargs)) - else: - return self._vapp.post_json(self._uri(uri), json or data, upload_files=files, **self._kwargs(**kwargs)) - return requests.post(self._uri(uri), json=json or data, files=files, **self._kwargs(**kwargs)) - - def get_schema(self, schema_name: str) -> Optional[dict]: - return get_schema(schema_name, portal_vapp=self._vapp, key=self._key) - - def get_schemas(self) -> dict: - return self.get("/profiles/").json() - - def _uri(self, uri: str) -> str: - if not isinstance(uri, str) or not uri: - return "/" - if uri.lower().startswith("http://") or uri.lower().startswith("https://"): - return uri - uri = re.sub(r"/+", "/", uri) - return (self._server + ("/" if uri.startswith("/") else "") + uri) if self._server else uri - - def _kwargs(self, **kwargs) -> dict: - result_kwargs = {"headers": - kwargs.get("headers", {"Content-type": "application/json", "Accept": "application/json"})} - if self._key_pair: - result_kwargs["auth"] = self._key_pair - if isinstance(timeout := kwargs.get("timeout"), int): - result_kwargs["timeout"] = timeout - return result_kwargs - - def _response(self, response) -> Optional[RequestResponse]: - if response and isinstance(getattr(response.__class__, "json"), property): - class RequestResponseWrapper: # For consistency change json property to method. - def __init__(self, response, **kwargs): - super().__init__(**kwargs) - self._response = response - def __getattr__(self, attr): # noqa - return getattr(self._response, attr) - def json(self): # noqa - return self._response.json - response = RequestResponseWrapper(response) - return response - - @staticmethod - def create_for_testing(ini_file: Optional[str] = None) -> PortalBase: - if isinstance(ini_file, str): - return PortalBase(PortalBase._create_testapp(ini_file)) - minimal_ini_for_unit_testing = "[app:app]\nuse = egg:encoded\nsqlalchemy.url = postgresql://dummy\n" - with temporary_file(content=minimal_ini_for_unit_testing, suffix=".ini") as ini_file: - return PortalBase(PortalBase._create_testapp(ini_file)) - - @staticmethod - def create_for_testing_local(ini_file: Optional[str] = None) -> PortalBase: - if isinstance(ini_file, str) and ini_file: - return PortalBase(PortalBase._create_testapp(ini_file)) - minimal_ini_for_testing_local = "\n".join([ - "[app:app]\nuse = egg:encoded\nfile_upload_bucket = dummy", - "sqlalchemy.url = postgresql://postgres@localhost:5441/postgres?host=/tmp/snovault/pgdata", - "multiauth.groupfinder = encoded.authorization.smaht_groupfinder", - "multiauth.policies = auth0 session remoteuser accesskey", - "multiauth.policy.session.namespace = mailto", - "multiauth.policy.session.use = encoded.authentication.NamespacedAuthenticationPolicy", - "multiauth.policy.session.base = pyramid.authentication.SessionAuthenticationPolicy", - "multiauth.policy.remoteuser.namespace = remoteuser", - "multiauth.policy.remoteuser.use = encoded.authentication.NamespacedAuthenticationPolicy", - "multiauth.policy.remoteuser.base = pyramid.authentication.RemoteUserAuthenticationPolicy", - "multiauth.policy.accesskey.namespace = accesskey", - "multiauth.policy.accesskey.use = encoded.authentication.NamespacedAuthenticationPolicy", - "multiauth.policy.accesskey.base = encoded.authentication.BasicAuthAuthenticationPolicy", - "multiauth.policy.accesskey.check = encoded.authentication.basic_auth_check", - "multiauth.policy.auth0.use = encoded.authentication.NamespacedAuthenticationPolicy", - "multiauth.policy.auth0.namespace = auth0", - "multiauth.policy.auth0.base = encoded.authentication.Auth0AuthenticationPolicy" - ]) - with temporary_file(content=minimal_ini_for_testing_local, suffix=".ini") as minimal_ini_file: - return PortalBase(PortalBase._create_testapp(minimal_ini_file)) - - @staticmethod - def _create_testapp(value: Union[str, Router, TestApp] = "development.ini") -> TestApp: - """ - Creates and returns a TestApp. Refactored out of above loadxl code to consolidate at a - single point; also for use by the generate_local_access_key and view_local_object scripts. - """ - if isinstance(value, TestApp): - return value - app = value if isinstance(value, Router) else get_app(value, "app") - return TestApp(app, {"HTTP_ACCEPT": "application/json", "REMOTE_USER": "TEST"}) - - class Portal(PortalBase): def __init__(self, arg: Optional[Union[VirtualApp, TestApp, Router, Portal, dict, tuple, str]] = None, - env: Optional[str] = None, app: OrchestratedApp = APP_SMAHT, server: Optional[str] = None, + env: Optional[str] = None, app: OrchestratedApp = None, server: Optional[str] = None, key: Optional[Union[dict, tuple]] = None, portal: Optional[Union[VirtualApp, TestApp, Router, Portal, str]] = None, data: Optional[dict] = None, schemas: Optional[List[dict]] = None) -> Optional[Portal]: @@ -780,42 +577,9 @@ def get_schemas(self) -> dict: schemas[user_specified_schema["title"]] = user_specified_schema return schemas - def is_file_schema(self, schema_name: str) -> bool: - if super_type_map := self.get_schemas_super_type_map(): - if file_super_type := super_type_map.get(FILE_SCHEMA_NAME): - return Schema.type_name(schema_name) in file_super_type - return False - @lru_cache(maxsize=1) def get_schemas_super_type_map(self) -> dict: - """ - Returns the "super type map" for all of the known schemas (via /profiles). - This is a dictionary of all types which have (one or more) sub-types whose value is - an array of all of those sub-types (direct and all descendents), in breadth first order. - """ - def breadth_first(super_type_map: dict, super_type_name: str) -> dict: - result = [] - queue = deque(super_type_map.get(super_type_name, [])) - while queue: - result.append(sub_type_name := queue.popleft()) - if sub_type_name in super_type_map: - queue.extend(super_type_map[sub_type_name]) - return result - if not (schemas := self.get_schemas()): - return {} - super_type_map = {} - for type_name in schemas: - if super_type_name := schemas[type_name].get("rdfs:subClassOf"): - super_type_name = super_type_name.replace("/profiles/", "").replace(".json", "") - if super_type_name != "Item": - if not super_type_map.get(super_type_name): - super_type_map[super_type_name] = [type_name] - elif type_name not in super_type_map[super_type_name]: - super_type_map[super_type_name].append(type_name) - super_type_map_flattened = {} - for super_type_name in super_type_map: - super_type_map_flattened[super_type_name] = breadth_first(super_type_map, super_type_name) - return super_type_map_flattened + return super(Portal, self).get_schemas_super_type_map() def ref_exists(self, type_name: str, value: str) -> List[str]: resolved = [] From 030450ea9f183051def07d3f1f7e1812ada20076 Mon Sep 17 00:00:00 2001 From: David Michaels Date: Sun, 10 Dec 2023 14:07:54 -0500 Subject: [PATCH 45/53] Refactored PortalBase from structured_data into portal_utils.Portal --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 3d4df104d..4724edf86 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "dcicutils" -version = "8.4.0.1b11" # TODO: To become 8.4.1 +version = "8.4.0.1b12" # TODO: To become 8.4.1 description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources" authors = ["4DN-DCIC Team "] license = "MIT" From 9ce5ba4e44c945443826fce3dcab599ac12f077c Mon Sep 17 00:00:00 2001 From: David Michaels Date: Sun, 10 Dec 2023 14:08:11 -0500 Subject: [PATCH 46/53] Refactored PortalBase from structured_data into portal_utils.Portal --- dcicutils/structured_data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dcicutils/structured_data.py b/dcicutils/structured_data.py index b9d0a8286..fab982998 100644 --- a/dcicutils/structured_data.py +++ b/dcicutils/structured_data.py @@ -11,7 +11,7 @@ from dcicutils.common import OrchestratedApp from dcicutils.data_readers import CsvReader, Excel, RowReader from dcicutils.misc_utils import (create_object, load_json_if, merge_objects, remove_empty_properties, right_trim, - split_string, to_boolean, to_camel_case, to_enum, to_float, to_integer, VirtualApp) + split_string, to_boolean, to_enum, to_float, to_integer, VirtualApp) from dcicutils.portal_utils import Portal as PortalBase from dcicutils.zip_utils import unpack_gz_file_to_temporary_file, unpack_files From a7ed50b73de81ce863027f1d4115eac5b9fa9a40 Mon Sep 17 00:00:00 2001 From: David Michaels Date: Sun, 10 Dec 2023 14:13:03 -0500 Subject: [PATCH 47/53] Refactored PortalBase from structured_data into portal_utils.Portal --- dcicutils/structured_data.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/dcicutils/structured_data.py b/dcicutils/structured_data.py index fab982998..bd641e0ee 100644 --- a/dcicutils/structured_data.py +++ b/dcicutils/structured_data.py @@ -541,7 +541,7 @@ def __init__(self, key: Optional[Union[dict, tuple]] = None, portal: Optional[Union[VirtualApp, TestApp, Router, Portal, str]] = None, data: Optional[dict] = None, schemas: Optional[List[dict]] = None) -> Optional[Portal]: - super(Portal, self).__init__(arg, env=env, app=app, server=server, key=key, portal=portal) + super().__init__(arg, env=env, app=app, server=server, key=key, portal=portal) if isinstance(arg, Portal) and not portal: portal = arg if isinstance(portal, Portal): @@ -554,7 +554,7 @@ def __init__(self, @lru_cache(maxsize=256) def get_metadata(self, object_name: str) -> Optional[dict]: try: - return super(Portal, self).get_metadata(object_name) + return super().get_metadata(object_name) except Exception: return None @@ -569,7 +569,7 @@ def get_schema(self, schema_name: str) -> Optional[dict]: @lru_cache(maxsize=1) def get_schemas(self) -> dict: - schemas = super(Portal, self).get_schemas() + schemas = super().get_schemas() if self._schemas: schemas = copy.deepcopy(schemas) for user_specified_schema in self._schemas: @@ -579,7 +579,7 @@ def get_schemas(self) -> dict: @lru_cache(maxsize=1) def get_schemas_super_type_map(self) -> dict: - return super(Portal, self).get_schemas_super_type_map() + return super().get_schemas_super_type_map() def ref_exists(self, type_name: str, value: str) -> List[str]: resolved = [] From 6442541e1ff9f5b5c36ae7918b53af92678d0fe8 Mon Sep 17 00:00:00 2001 From: David Michaels Date: Sun, 10 Dec 2023 14:37:29 -0500 Subject: [PATCH 48/53] added portal_utils to dcicutils.rst --- docs/source/dcicutils.rst | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/docs/source/dcicutils.rst b/docs/source/dcicutils.rst index 24d41daf6..37da22490 100644 --- a/docs/source/dcicutils.rst +++ b/docs/source/dcicutils.rst @@ -246,6 +246,13 @@ opensearch_utils :members: +portal_utils +^^^^^^^^^^^^ + +.. automodule:: dcicutils.portal_utils + :members: + + project_utils ^^^^^^^^^^^^^ From 137807a5c1e33cb5d7486a78d190dce225165776 Mon Sep 17 00:00:00 2001 From: David Michaels Date: Mon, 11 Dec 2023 12:58:24 -0500 Subject: [PATCH 49/53] minor code cleanup --- dcicutils/portal_utils.py | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/dcicutils/portal_utils.py b/dcicutils/portal_utils.py index 551a0201c..f23574adc 100644 --- a/dcicutils/portal_utils.py +++ b/dcicutils/portal_utils.py @@ -59,7 +59,7 @@ def __init__(self, if isinstance(portal, (VirtualApp, TestApp)): self._vapp = portal elif isinstance(portal, (Router, str)): - self._vapp = Portal._create_testapp(portal) + self._vapp = Portal._create_vapp(portal) elif isinstance(key, dict): self._key = key self._key_pair = (key.get("key"), key.get("secret")) if key else None @@ -224,15 +224,15 @@ def json(self): # noqa @staticmethod def create_for_testing(ini_file: Optional[str] = None) -> Portal: if isinstance(ini_file, str): - return Portal(Portal._create_testapp(ini_file)) + return Portal(Portal._create_vapp(ini_file)) minimal_ini_for_unit_testing = "[app:app]\nuse = egg:encoded\nsqlalchemy.url = postgresql://dummy\n" with temporary_file(content=minimal_ini_for_unit_testing, suffix=".ini") as ini_file: - return Portal(Portal._create_testapp(ini_file)) + return Portal(Portal._create_vapp(ini_file)) @staticmethod def create_for_testing_local(ini_file: Optional[str] = None) -> Portal: if isinstance(ini_file, str) and ini_file: - return Portal(Portal._create_testapp(ini_file)) + return Portal(Portal._create_vapp(ini_file)) minimal_ini_for_testing_local = "\n".join([ "[app:app]\nuse = egg:encoded\nfile_upload_bucket = dummy", "sqlalchemy.url = postgresql://postgres@localhost:5441/postgres?host=/tmp/snovault/pgdata", @@ -253,15 +253,11 @@ def create_for_testing_local(ini_file: Optional[str] = None) -> Portal: "multiauth.policy.auth0.base = encoded.authentication.Auth0AuthenticationPolicy" ]) with temporary_file(content=minimal_ini_for_testing_local, suffix=".ini") as minimal_ini_file: - return Portal(Portal._create_testapp(minimal_ini_file)) + return Portal(Portal._create_vapp(minimal_ini_file)) @staticmethod - def _create_testapp(value: Union[str, Router, TestApp] = "development.ini") -> TestApp: - """ - Creates and returns a TestApp. Refactored out of above loadxl code to consolidate at a - single point; also for use by the generate_local_access_key and view_local_object scripts. - """ + def _create_vapp(value: Union[str, Router, TestApp] = "development.ini", app_name: str = "app") -> TestApp: if isinstance(value, TestApp): return value - app = value if isinstance(value, Router) else get_app(value, "app") + app = value if isinstance(value, Router) else get_app(value, app_name) return TestApp(app, {"HTTP_ACCEPT": "application/json", "REMOTE_USER": "TEST"}) From 99394745a2ae4782fe89d0d94e67677c9a5f6feb Mon Sep 17 00:00:00 2001 From: David Michaels Date: Mon, 11 Dec 2023 12:59:41 -0500 Subject: [PATCH 50/53] CHANGELOG changes --- CHANGELOG.rst | 4 ++-- pyproject.toml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 8b1ffd0b7..802801dca 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -8,8 +8,8 @@ Change Log 8.4.1 ===== -* Strip sheet name in data_readers.Excel. -* Moved structured_data.py from smaht-portal to here. +* Moved structured_data.py from smaht-portal to here; new portal_utils and data_readers modules. +* Strip sheet name in data_readers.Excel; respecte (ignore) hidden sheets. 8.4.0 diff --git a/pyproject.toml b/pyproject.toml index 4724edf86..665e08356 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "dcicutils" -version = "8.4.0.1b12" # TODO: To become 8.4.1 +version = "8.4.0.1b13" # TODO: To become 8.4.1 description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources" authors = ["4DN-DCIC Team "] license = "MIT" From 367a51bd734270a969ea48fa37cfdfd0e4da93a9 Mon Sep 17 00:00:00 2001 From: David Michaels Date: Mon, 11 Dec 2023 13:11:24 -0500 Subject: [PATCH 51/53] minor updates based on pr-294 --- dcicutils/data_readers.py | 6 +++--- dcicutils/misc_utils.py | 2 +- dcicutils/structured_data.py | 8 ++++---- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/dcicutils/data_readers.py b/dcicutils/data_readers.py index 8fd6d9a10..f569857c5 100644 --- a/dcicutils/data_readers.py +++ b/dcicutils/data_readers.py @@ -2,7 +2,7 @@ import csv import openpyxl from typing import Any, Generator, Iterator, List, Optional, Type, Tuple, Union -from dcicutils.misc_utils import create_object, right_trim +from dcicutils.misc_utils import create_dict, right_trim # Forward type references for type hints. Excel = Type["Excel"] @@ -59,11 +59,11 @@ def file(self) -> Optional[str]: def warnings(self) -> List[str]: warnings = [] if self._warning_empty_headers: - warnings.append({"src": create_object(file=self.file), + warnings.append({"src": create_dict(file=self.file), "warning": "Empty header column encountered; ignoring it and all subsequent columns."}) if self._warning_extra_values: for row_number in self._warning_extra_values: - warnings.append({"src": create_object(file=self.file, row=row_number), + warnings.append({"src": create_dict(file=self.file, row=row_number), "warning": f"Extra row column values."}) return warnings diff --git a/dcicutils/misc_utils.py b/dcicutils/misc_utils.py index 752fc91e7..baecf97c6 100644 --- a/dcicutils/misc_utils.py +++ b/dcicutils/misc_utils.py @@ -1501,7 +1501,7 @@ def right_trim(list_or_tuple: Union[List[Any], Tuple[Any]], return list_or_tuple[:i + 1] -def create_object(**kwargs) -> dict: +def create_dict(**kwargs) -> dict: result = {} for name in kwargs: if kwargs[name]: diff --git a/dcicutils/structured_data.py b/dcicutils/structured_data.py index bd641e0ee..de25ef114 100644 --- a/dcicutils/structured_data.py +++ b/dcicutils/structured_data.py @@ -10,7 +10,7 @@ from webtest.app import TestApp from dcicutils.common import OrchestratedApp from dcicutils.data_readers import CsvReader, Excel, RowReader -from dcicutils.misc_utils import (create_object, load_json_if, merge_objects, remove_empty_properties, right_trim, +from dcicutils.misc_utils import (create_dict, load_json_if, merge_objects, remove_empty_properties, right_trim, split_string, to_boolean, to_enum, to_float, to_integer, VirtualApp) from dcicutils.portal_utils import Portal as PortalBase from dcicutils.zip_utils import unpack_gz_file_to_temporary_file, unpack_files @@ -71,7 +71,7 @@ def validate(self, force: bool = False) -> None: row_number += 1 if (validation_errors := schema.validate(data)) is not None: for validation_error in validation_errors: - self._note_error({"src": create_object(type=schema.name, row=row_number), + self._note_error({"src": create_dict(type=schema.name, row=row_number), "error": validation_error}, "validation") @property @@ -204,8 +204,8 @@ def create_row(self) -> dict: def set_value(self, data: dict, column_name: str, value: str, file: Optional[str], row_number: int = -1) -> None: if (set_value_function := self._set_value_functions.get(column_name)): - src = create_object(type=self._schema.name if self._schema else None, - column=column_name, file=file, row=row_number) + src = create_dict(type=self._schema.name if self._schema else None, + column=column_name, file=file, row=row_number) set_value_function(data, value, src) def _create_row_template(self, column_names: List[str]) -> dict: # Surprisingly tricky code here. From ab05d1d672f44191a40eb01c097d943c5ac91812 Mon Sep 17 00:00:00 2001 From: David Michaels Date: Mon, 11 Dec 2023 13:32:28 -0500 Subject: [PATCH 52/53] docstring for Portal --- dcicutils/portal_utils.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/dcicutils/portal_utils.py b/dcicutils/portal_utils.py index f23574adc..82a34f574 100644 --- a/dcicutils/portal_utils.py +++ b/dcicutils/portal_utils.py @@ -17,7 +17,22 @@ class Portal: - + """ + This is meant to be an uber wrapper for Portal access. It can be created in a variety of ways: + 1. From a (Portal) .ini file (e.g. development.ini) + 2. From a key dictionary, containing "key" and "secret" property values. + 3. From a key tuple, containing (in order) a key and secret values. + 4. From a keys file assumed to reside in ~/.{app}-keys.json where the given "app" value is either "smaht", "cgap", + or "fourfront"; and where this file is assumed to contain a dictionary with a key equal to the given "env" + value (e.g. smaht-localhost) and with a dictionary value containing "key" and "secret" property values; if + an "app" value is not specified but the given "env" value begins with one of the app values then that value + will be used, i.e. e.g. if env is "smaht-localhost" and app is unspecified than it is assumed to be "smaht". + 5. From a keys file as described above (#4) but rather than be identified by the given "env" value it + is looked up by the given "server" name and the "server" key dictionary value in the key file. + 6. From a given "vapp" value (which is assumed to be a TestApp or VirtualApp). + 7. From another Portal object. + 8. From a a pyramid Router object. + """ def __init__(self, arg: Optional[Union[VirtualApp, TestApp, Router, Portal, dict, tuple, str]] = None, env: Optional[str] = None, app: Optional[OrchestratedApp] = None, server: Optional[str] = None, From 3dd5c24f83869e009f4079e40aec2db38048710c Mon Sep 17 00:00:00 2001 From: David Michaels Date: Mon, 11 Dec 2023 15:31:42 -0500 Subject: [PATCH 53/53] Update version to 8.5.0; ready to merge PR-294 to master. --- CHANGELOG.rst | 2 +- pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 802801dca..7e042af56 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -6,7 +6,7 @@ dcicutils Change Log ---------- -8.4.1 +8.5.0 ===== * Moved structured_data.py from smaht-portal to here; new portal_utils and data_readers modules. * Strip sheet name in data_readers.Excel; respecte (ignore) hidden sheets. diff --git a/pyproject.toml b/pyproject.toml index 665e08356..9a9e81594 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "dcicutils" -version = "8.4.0.1b13" # TODO: To become 8.4.1 +version = "8.5.0" description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources" authors = ["4DN-DCIC Team "] license = "MIT"