Skip to content

Commit

Permalink
Merge pull request #299 from 4dn-dcic/dmichaels-20240205
Browse files Browse the repository at this point in the history
Fixes related to submitr for date/time type and out-of-order references.
  • Loading branch information
dmichaels-harvard authored Feb 15, 2024
2 parents 174d225 + a46f772 commit f6d8ae4
Show file tree
Hide file tree
Showing 14 changed files with 553 additions and 60 deletions.
9 changes: 9 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,15 @@ Change Log
----------


8.8.0
=====
* Changes to structured_data support date/time types.
* Changes to structured_data support internal references in any order.
* New datetime_utils module and tests; first created for date/time support in structured_data.
* Added view-portal-object script for general troubleshooting.
* Change to data_reader to ignore sheet names enclosed in parenthesis.


8.7.2
=====

Expand Down
70 changes: 70 additions & 0 deletions dcicutils/captured_output.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
from collections import namedtuple
from contextlib import contextmanager
import io
import sys
from typing import Optional

_real_stdout = sys.stdout
_real_stderr = sys.stderr


@contextmanager
def captured_output(capture: bool = True):
"""
Context manager to capture any/all output to stdout or stderr, and not actually output it to stdout
or stderr. Yields and object with a get_captured_output() method to get the output captured thus far,
and another uncaptured_print() method to actually print the given output to stdout, even though output
to stdout is being captured. Can be useful, for example, in creating command-line scripts which invoke
code which outputs a lot of info, warning, error, etc to stdout or stderr, and we want to suprress that
output; but with the yielded uncaptured_print() method output specific to the script can actually be
output (to stdout); and/or can also optionally output any/all captured output, e.g. for debugging or
troubleshooting purposes. Disable this capture, without having to restructure your code WRT the usage
of the with-clause with this context manager, pass False as an argument to this context manager.
"""

original_stdout = _real_stdout
original_stderr = _real_stderr
captured_output = io.StringIO()

def set_original_output() -> None:
sys.stdout = original_stdout
sys.stderr = original_stderr

def set_captured_output() -> None:
if capture:
sys.stdout = captured_output
sys.stderr = captured_output

def uncaptured_print(*args, **kwargs) -> None:
set_original_output()
print(*args, **kwargs)
set_captured_output()

def uncaptured_input(message: str) -> str:
set_original_output()
value = input(message)
set_captured_output()
return value

def get_captured_output() -> Optional[str]:
return captured_output.getvalue() if capture else None

try:
set_captured_output()
Result = namedtuple("Result", ["get_captured_output", "uncaptured_print", "uncaptured_input"])
yield Result(get_captured_output, uncaptured_print, uncaptured_input)
finally:
set_original_output()


@contextmanager
def uncaptured_output():
original_stdout = sys.stdout
original_stderr = sys.stderr
sys.stdout = _real_stdout
sys.stderr = _real_stderr
try:
yield
finally:
sys.stdout = original_stdout
sys.stderr = original_stderr
13 changes: 11 additions & 2 deletions dcicutils/data_readers.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,9 +152,10 @@ def open(self) -> None:

class Excel:

def __init__(self, file: str, reader_class: Optional[Type] = None) -> None:
def __init__(self, file: str, reader_class: Optional[Type] = None, include_hidden_sheets: bool = False) -> None:
self._file = file
self._workbook = None
self._include_hidden_sheets = include_hidden_sheets
self.sheet_names = None
if isinstance(reader_class, Type) and issubclass(reader_class, ExcelSheetReader):
self._reader_class = reader_class
Expand All @@ -169,7 +170,15 @@ def open(self) -> None:
if self._workbook is None:
self._workbook = openpyxl.load_workbook(self._file, data_only=True)
self.sheet_names = [sheet_name for sheet_name in self._workbook.sheetnames
if self._workbook[sheet_name].sheet_state != "hidden"]
if not self.is_hidden_sheet(self._workbook[sheet_name])]

def is_hidden_sheet(self, sheet: openpyxl.worksheet.worksheet.Worksheet) -> bool:
if not self._include_hidden_sheets:
if sheet.sheet_state == "hidden":
return True
if sheet.title.startswith("(") and sheet.title.endswith(")"):
return True
return False

def __del__(self) -> None:
if (workbook := self._workbook) is not None:
Expand Down
98 changes: 98 additions & 0 deletions dcicutils/datetime_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
from dcicutils.misc_utils import normalize_spaces
from datetime import datetime, timedelta, timezone
from typing import Optional, Tuple


def parse_datetime_string(value: str) -> Optional[datetime]:
"""
Parses the given string into a datetime object and returns it, or if ill-formated then returns None.
The given string is assumed to be in the format "YYYY-MM-DD hh:mm:ss" and with an optional timezone
suffix in format "+hh:mm" or "+hh". Also allowed is just a date of the format "YYYY-MM-DD" in which
case a time of "00:00:00" is assumed. If no timezone is specified then the local timezone is assumed.
"""
if not isinstance(value, str) or not (value := normalize_spaces(value)):
return None
tz_hours = -1
tz_minutes = -1
if value.rfind("T") > 0:
value = value.replace("T", " ")
if (space := value.find(" ")) > 0 and (value_suffix := value[space + 1:]):
if (plus := value_suffix.rfind("+")) > 0 or (minus := value_suffix.rfind("-")) > 0:
value = normalize_spaces(value[:space] + " " + value_suffix[:(plus if plus > 0 else minus)])
if value_tz := normalize_spaces(value_suffix[(plus if plus > 0 else minus) + 1:]):
if len(value_tz := value_tz.split(":")) == 2:
value_tz_hours = value_tz[0].strip()
value_tz_minutes = value_tz[1].strip()
else:
value_tz_hours = value_tz[0].strip()
value_tz_minutes = "0"
if value_tz_hours.isdigit() and value_tz_minutes.isdigit():
tz_hours = int(value_tz_hours)
tz_minutes = int(value_tz_minutes)
if not (plus > 0):
tz_hours = -tz_hours
else:
value = value + " 00:00:00"
if tz_hours < 0 or tz_minutes < 0:
tz_hours, tz_minutes = get_local_timezone_hours_minutes()
try:
dt = datetime.strptime(value, "%Y-%m-%d %H:%M:%S")
tz = timezone(timedelta(hours=tz_hours, minutes=tz_minutes))
return dt.replace(tzinfo=tz)
except Exception:
return None


def parse_date_string(value: str) -> Optional[datetime]:
"""
Parses the given string into a datetime object representing only a date and
returns it, or if ill-formated then returns None. The given string is assumed
to be in the format "YYYY-MM-DD"; if a given string of this format is suffixed
with a space or a "T" and ANYTHING else, then that trailing portion is ignored.
"""
if isinstance(value, str) and (value := normalize_spaces(value)):
if (separator := value.find(" ")) > 0 or (separator := value.find("T")) > 0:
value = value[:separator]
try:
return datetime.strptime(value, "%Y-%m-%d")
except Exception:
pass


def normalize_datetime_string(value: str) -> Optional[str]:
"""
Parses the given string into a datetime object and returns a string for that datetime in ISO-8601 format,
or if ill-formated then returns None. The given string is assumed to be in the format "YYYY-MM-DD hh:mm:ss"
and with an optional timezone suffix in format "+hh:mm" or "+hh". Also allowed is just a date of the
format "YYYY-MM-DD" in which case a time of "00:00:00" is assumed. If no timezone is specified then
the local timezone is assumed. The returned format looks like this: "2024-02-08T10:37:51-05:00"
"""
dt = parse_datetime_string(value)
return dt.isoformat() if dt else None


def normalize_date_string(value: str) -> Optional[str]:
"""
Parses the given string into a datetime object representing only a date and returns a string for that
date in ISO-8601 format, or if ill-formated then returns None. The given string is assumed to be in
the format "YYYY-MM-DD"; but if a given string of this format is suffixed with a space followed by
ANYTHING else, then that trailing portion is ignored. The returned format looks like this: "2024-02-08"
"""
d = parse_date_string(value)
return d.strftime("%Y-%m-%d") if d else None


def get_local_timezone_string() -> str:
"""
Returns current/local timezone in format like: "-05:00".
"""
tz_hours, tz_minutes = get_local_timezone_hours_minutes()
return f"{tz_hours:+03d}:{tz_minutes:02d}"


def get_local_timezone_hours_minutes() -> Tuple[int, int]:
"""
Returns a tuple with the integer hours and minutes offset for the current/local timezone.
"""
tz_minutes = datetime.now(timezone.utc).astimezone().utcoffset().total_seconds() / 60
return int(tz_minutes // 60), int(abs(tz_minutes % 60))
8 changes: 8 additions & 0 deletions dcicutils/misc_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2540,6 +2540,14 @@ def pad_to(target_size: int, data: list, *, padding=None):
return data


def normalize_spaces(value: str) -> str:
"""
Returns the given string with multiple consecutive occurrences of whitespace
converted to a single space, and left and right trimmed of spaces.
"""
return re.sub(r"\s+", " ", value).strip()


class JsonLinesReader:

def __init__(self, fp, padded=False, padding=None):
Expand Down
12 changes: 7 additions & 5 deletions dcicutils/portal_object_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,11 @@ class PortalObject:

_PROPERTY_DELETION_SENTINEL = RowReader.CELL_DELETION_SENTINEL

def __init__(self, portal_object: dict, portal: Portal = None, type: Optional[str] = None) -> None:
self._data = portal_object
self._portal = portal
def __init__(self, data: dict, portal: Portal = None,
schema: Optional[Union[dict, Schema]] = None, type: Optional[str] = None) -> None:
self._data = data if isinstance(data, dict) else {}
self._portal = portal if isinstance(portal, Portal) else None
self._schema = schema if isinstance(schema, dict) else (schema.data if isinstance(schema, Schema) else None)
self._type = type if isinstance(type, str) and type else None

@property
Expand All @@ -29,7 +31,7 @@ def portal(self) -> Optional[Portal]:
@property
@lru_cache(maxsize=1)
def type(self) -> Optional[str]:
return self._type or Portal.get_schema_type(self._data)
return self._type or Portal.get_schema_type(self._data) or (Schema(self._schema).type if self._schema else None)

@property
@lru_cache(maxsize=1)
Expand All @@ -44,7 +46,7 @@ def uuid(self) -> Optional[str]:
@property
@lru_cache(maxsize=1)
def schema(self) -> Optional[dict]:
return self._portal.get_schema(self.type) if self._portal else None
return self._schema if self._schema else (self._portal.get_schema(self.type) if self._portal else None)

def copy(self) -> PortalObject:
return PortalObject(deepcopy(self.data), portal=self.portal, type=self.type)
Expand Down
87 changes: 51 additions & 36 deletions dcicutils/portal_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,9 +88,9 @@ def init_from_key(key: dict, server: Optional[str], unspecified: Optional[list]
isinstance(secret := key.get("secret"), str) and secret): # noqa
self._key = {"key": key_id, "secret": secret}
if (isinstance(server, str) and server) or (isinstance(server := key.get("server"), str) and server):
if server := normalize_server(server):
if server := Portal._normalize_server(server):
if isinstance(key_server := key.get("server"), str) and key_server:
if normalize_server(key_server) != server:
if Portal._normalize_server(key_server) != server:
raise Exception(f"Portal server inconsistency: {server} vs {key_server}")
self._key["server"] = self._server = server
if not self._key:
Expand All @@ -104,49 +104,21 @@ def init_from_key_pair(key_pair: tuple, server: Optional[str], unspecified: Opti

def init_from_keys_file(keys_file: str, env: Optional[str], server: Optional[str],
unspecified: Optional[list] = []) -> None:
try:
with io.open(keys_file := os.path.expanduser(keys_file)) as f:
keys = json.load(f)
except Exception:
raise Exception(f"Portal initialization error; cannot open keys-file: {keys_file}")
if isinstance(env, str) and env and isinstance(key := keys.get(env), dict):
init_from_key(key, server)
self._keys_file = keys_file
self._env = env
elif (isinstance(server, str) and (server := normalize_server(server)) and
(key := [keys[k] for k in keys if normalize_server(keys[k].get("server")) == server])):
init_from_key(key[0], server)
self._keys_file = keys_file
elif not env and len(keys) == 1 and (env := next(iter(keys))) and isinstance(key := keys[env], dict):
key, env = Portal._lookup_in_keys_file(keys_file, env, server, raise_exception=True)
if key:
init_from_key(key, server)
self._keys_file = keys_file
self._env = env
else:
raise Exception(f"Portal initialization error;"
f" {env or server or None} not found in keys-file: {keys_file}")

def init_from_env_server_app(env: str, server: str, app: Optional[str],
unspecified: Optional[list] = None) -> None:
if keys_file := self._default_keys_file(app, env):
if keys_file := Portal._default_keys_file(app, env, server):
init_from_keys_file(keys_file, env, server, unspecified=unspecified)
else:
init(unspecified=unspecified)
self._env = env
self._server = server

def normalize_server(server: str) -> Optional[str]:
prefix = ""
if (lowercase_server := server.lower()).startswith("http://"):
prefix = "http://"
elif lowercase_server.startswith("https://"):
prefix = "https://"
if prefix:
if (server := re.sub(r"/+", "/", server[len(prefix):])).startswith("/"):
server = server[1:]
if len(server) > 1 and server.endswith("/"):
server = server[:-1]
return prefix + server if server else None

if (valid_app := app) and not (valid_app := Portal._valid_app(app)):
raise Exception(f"Portal initialization error; invalid app: {app}")
self._app = valid_app
Expand All @@ -166,7 +138,7 @@ def normalize_server(server: str) -> Optional[str]:
init_from_env_server_app(arg, server, app, unspecified=[env])
elif (isinstance(env, str) and env) or (isinstance(server, str) and server):
init_from_env_server_app(env, server, app, unspecified=[arg])
elif not arg and (keys_file := self._default_keys_file(app=self._app or Portal.DEFAULT_APP, env=env)):
elif not arg and (keys_file := Portal._default_keys_file(self._app or Portal.DEFAULT_APP, env, server)):
# If no initial arg then look for default app keys file.
init_from_keys_file(keys_file, env, server)
elif raise_exception:
Expand Down Expand Up @@ -411,14 +383,57 @@ def _kwargs(self, **kwargs) -> dict:
result_kwargs["timeout"] = timeout
return result_kwargs

def _default_keys_file(self, app: Optional[str], env: Optional[str] = None) -> Optional[str]:
@staticmethod
def _default_keys_file(app: Optional[str], env: Optional[str], server: Optional[str]) -> Optional[str]:
def infer_app_from_env(env: str) -> Optional[str]: # noqa
if isinstance(env, str) and (lowercase_env := env.lower()):
if app := [app for app in ORCHESTRATED_APPS if lowercase_env.startswith(app.lower())]:
return self._valid_app(app[0])
return Portal._valid_app(app[0])
if (app := Portal._valid_app(app)) or (app := infer_app_from_env(env)):
keys_file = os.path.expanduser(os.path.join(Portal.KEYS_FILE_DIRECTORY, f".{app.lower()}-keys.json"))
return keys_file if os.path.exists(keys_file) else None
if not app:
for app in ORCHESTRATED_APPS:
if keys_file := Portal._default_keys_file(app, env, server):
if Portal._lookup_in_keys_file(keys_file, env, server)[0]:
return keys_file

@staticmethod
def _lookup_in_keys_file(keys_file: str, env: Optional[str], server: Optional[str],
raise_exception: bool = False) -> Tuple[Optional[dict], Optional[str]]:
try:
with io.open(keys_file := os.path.expanduser(keys_file)) as f:
keys = json.load(f)
except Exception:
if raise_exception:
raise Exception(f"Portal initialization error; cannot open keys-file: {keys_file}")
return None, None
if isinstance(env, str) and env and isinstance(key := keys.get(env), dict):
return key, env
elif (isinstance(server, str) and (server := Portal._normalize_server(server)) and
(key := [keys[k] for k in keys if Portal._normalize_server(keys[k].get("server")) == server])):
return key[0], env
elif not env and len(keys) == 1 and (env := next(iter(keys))) and isinstance(key := keys[env], dict):
return key, env
else:
if raise_exception:
raise Exception(f"Portal initialization error;"
f" {env or server or None} not found in keys-file: {keys_file}")
return None, None

@staticmethod
def _normalize_server(server: str) -> Optional[str]:
prefix = ""
if (lowercase_server := server.lower()).startswith("http://"):
prefix = "http://"
elif lowercase_server.startswith("https://"):
prefix = "https://"
if prefix:
if (server := re.sub(r"/+", "/", server[len(prefix):])).startswith("/"):
server = server[1:]
if len(server) > 1 and server.endswith("/"):
server = server[:-1]
return prefix + server if server else None

@staticmethod
def _valid_app(app: Optional[str]) -> Optional[str]:
Expand Down
Loading

0 comments on commit f6d8ae4

Please sign in to comment.