Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixes related to submitr for date/time type and out-of-order references. #299

Merged
merged 21 commits into from
Feb 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,15 @@ Change Log
----------


8.8.0
=====
* Changes to structured_data support date/time types.
* Changes to structured_data support internal references in any order.
* New datetime_utils module and tests; first created for date/time support in structured_data.
* Added view-portal-object script for general troubleshooting.
* Change to data_reader to ignore sheet names enclosed in parenthesis.


8.7.2
=====

Expand Down
70 changes: 70 additions & 0 deletions dcicutils/captured_output.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
from collections import namedtuple
from contextlib import contextmanager
import io
import sys
from typing import Optional

_real_stdout = sys.stdout
_real_stderr = sys.stderr


@contextmanager
def captured_output(capture: bool = True):
"""
Context manager to capture any/all output to stdout or stderr, and not actually output it to stdout
or stderr. Yields and object with a get_captured_output() method to get the output captured thus far,
and another uncaptured_print() method to actually print the given output to stdout, even though output
to stdout is being captured. Can be useful, for example, in creating command-line scripts which invoke
code which outputs a lot of info, warning, error, etc to stdout or stderr, and we want to suprress that
output; but with the yielded uncaptured_print() method output specific to the script can actually be
output (to stdout); and/or can also optionally output any/all captured output, e.g. for debugging or
troubleshooting purposes. Disable this capture, without having to restructure your code WRT the usage
of the with-clause with this context manager, pass False as an argument to this context manager.
"""

original_stdout = _real_stdout
original_stderr = _real_stderr
captured_output = io.StringIO()

def set_original_output() -> None:
sys.stdout = original_stdout
sys.stderr = original_stderr

def set_captured_output() -> None:
if capture:
sys.stdout = captured_output
sys.stderr = captured_output

def uncaptured_print(*args, **kwargs) -> None:
set_original_output()
print(*args, **kwargs)
set_captured_output()

def uncaptured_input(message: str) -> str:
set_original_output()
value = input(message)
set_captured_output()
return value

def get_captured_output() -> Optional[str]:
return captured_output.getvalue() if capture else None

try:
set_captured_output()
Result = namedtuple("Result", ["get_captured_output", "uncaptured_print", "uncaptured_input"])
yield Result(get_captured_output, uncaptured_print, uncaptured_input)
finally:
set_original_output()


@contextmanager
def uncaptured_output():
original_stdout = sys.stdout
original_stderr = sys.stderr
sys.stdout = _real_stdout
sys.stderr = _real_stderr
try:
yield
finally:
sys.stdout = original_stdout
sys.stderr = original_stderr
13 changes: 11 additions & 2 deletions dcicutils/data_readers.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,9 +152,10 @@ def open(self) -> None:

class Excel:

def __init__(self, file: str, reader_class: Optional[Type] = None) -> None:
def __init__(self, file: str, reader_class: Optional[Type] = None, include_hidden_sheets: bool = False) -> None:
self._file = file
self._workbook = None
self._include_hidden_sheets = include_hidden_sheets
self.sheet_names = None
if isinstance(reader_class, Type) and issubclass(reader_class, ExcelSheetReader):
self._reader_class = reader_class
Expand All @@ -169,7 +170,15 @@ def open(self) -> None:
if self._workbook is None:
self._workbook = openpyxl.load_workbook(self._file, data_only=True)
self.sheet_names = [sheet_name for sheet_name in self._workbook.sheetnames
if self._workbook[sheet_name].sheet_state != "hidden"]
if not self.is_hidden_sheet(self._workbook[sheet_name])]

def is_hidden_sheet(self, sheet: openpyxl.worksheet.worksheet.Worksheet) -> bool:
if not self._include_hidden_sheets:
if sheet.sheet_state == "hidden":
return True
if sheet.title.startswith("(") and sheet.title.endswith(")"):
return True
return False

def __del__(self) -> None:
if (workbook := self._workbook) is not None:
Expand Down
98 changes: 98 additions & 0 deletions dcicutils/datetime_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
from dcicutils.misc_utils import normalize_spaces
from datetime import datetime, timedelta, timezone
from typing import Optional, Tuple


def parse_datetime_string(value: str) -> Optional[datetime]:
"""
Parses the given string into a datetime object and returns it, or if ill-formated then returns None.
The given string is assumed to be in the format "YYYY-MM-DD hh:mm:ss" and with an optional timezone
suffix in format "+hh:mm" or "+hh". Also allowed is just a date of the format "YYYY-MM-DD" in which
case a time of "00:00:00" is assumed. If no timezone is specified then the local timezone is assumed.
"""
if not isinstance(value, str) or not (value := normalize_spaces(value)):
return None
tz_hours = -1
tz_minutes = -1
if value.rfind("T") > 0:
value = value.replace("T", " ")
if (space := value.find(" ")) > 0 and (value_suffix := value[space + 1:]):
if (plus := value_suffix.rfind("+")) > 0 or (minus := value_suffix.rfind("-")) > 0:
value = normalize_spaces(value[:space] + " " + value_suffix[:(plus if plus > 0 else minus)])
if value_tz := normalize_spaces(value_suffix[(plus if plus > 0 else minus) + 1:]):
if len(value_tz := value_tz.split(":")) == 2:
value_tz_hours = value_tz[0].strip()
value_tz_minutes = value_tz[1].strip()
else:
value_tz_hours = value_tz[0].strip()
value_tz_minutes = "0"
if value_tz_hours.isdigit() and value_tz_minutes.isdigit():
tz_hours = int(value_tz_hours)
tz_minutes = int(value_tz_minutes)
if not (plus > 0):
tz_hours = -tz_hours
else:
value = value + " 00:00:00"
if tz_hours < 0 or tz_minutes < 0:
tz_hours, tz_minutes = get_local_timezone_hours_minutes()
try:
dt = datetime.strptime(value, "%Y-%m-%d %H:%M:%S")
tz = timezone(timedelta(hours=tz_hours, minutes=tz_minutes))
return dt.replace(tzinfo=tz)
except Exception:
return None


def parse_date_string(value: str) -> Optional[datetime]:
"""
Parses the given string into a datetime object representing only a date and
returns it, or if ill-formated then returns None. The given string is assumed
to be in the format "YYYY-MM-DD"; if a given string of this format is suffixed
with a space or a "T" and ANYTHING else, then that trailing portion is ignored.
"""
if isinstance(value, str) and (value := normalize_spaces(value)):
if (separator := value.find(" ")) > 0 or (separator := value.find("T")) > 0:
value = value[:separator]
try:
return datetime.strptime(value, "%Y-%m-%d")
except Exception:
pass


def normalize_datetime_string(value: str) -> Optional[str]:
"""
Parses the given string into a datetime object and returns a string for that datetime in ISO-8601 format,
or if ill-formated then returns None. The given string is assumed to be in the format "YYYY-MM-DD hh:mm:ss"
and with an optional timezone suffix in format "+hh:mm" or "+hh". Also allowed is just a date of the
format "YYYY-MM-DD" in which case a time of "00:00:00" is assumed. If no timezone is specified then
the local timezone is assumed. The returned format looks like this: "2024-02-08T10:37:51-05:00"
"""
dt = parse_datetime_string(value)
return dt.isoformat() if dt else None


def normalize_date_string(value: str) -> Optional[str]:
"""
Parses the given string into a datetime object representing only a date and returns a string for that
date in ISO-8601 format, or if ill-formated then returns None. The given string is assumed to be in
the format "YYYY-MM-DD"; but if a given string of this format is suffixed with a space followed by
ANYTHING else, then that trailing portion is ignored. The returned format looks like this: "2024-02-08"
"""
d = parse_date_string(value)
return d.strftime("%Y-%m-%d") if d else None


def get_local_timezone_string() -> str:
"""
Returns current/local timezone in format like: "-05:00".
"""
tz_hours, tz_minutes = get_local_timezone_hours_minutes()
return f"{tz_hours:+03d}:{tz_minutes:02d}"


def get_local_timezone_hours_minutes() -> Tuple[int, int]:
"""
Returns a tuple with the integer hours and minutes offset for the current/local timezone.
"""
tz_minutes = datetime.now(timezone.utc).astimezone().utcoffset().total_seconds() / 60
return int(tz_minutes // 60), int(abs(tz_minutes % 60))
8 changes: 8 additions & 0 deletions dcicutils/misc_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2540,6 +2540,14 @@ def pad_to(target_size: int, data: list, *, padding=None):
return data


def normalize_spaces(value: str) -> str:
"""
Returns the given string with multiple consecutive occurrences of whitespace
converted to a single space, and left and right trimmed of spaces.
"""
return re.sub(r"\s+", " ", value).strip()


class JsonLinesReader:

def __init__(self, fp, padded=False, padding=None):
Expand Down
12 changes: 7 additions & 5 deletions dcicutils/portal_object_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,11 @@ class PortalObject:

_PROPERTY_DELETION_SENTINEL = RowReader.CELL_DELETION_SENTINEL

def __init__(self, portal_object: dict, portal: Portal = None, type: Optional[str] = None) -> None:
self._data = portal_object
self._portal = portal
def __init__(self, data: dict, portal: Portal = None,
schema: Optional[Union[dict, Schema]] = None, type: Optional[str] = None) -> None:
self._data = data if isinstance(data, dict) else {}
self._portal = portal if isinstance(portal, Portal) else None
self._schema = schema if isinstance(schema, dict) else (schema.data if isinstance(schema, Schema) else None)
self._type = type if isinstance(type, str) and type else None

@property
Expand All @@ -29,7 +31,7 @@ def portal(self) -> Optional[Portal]:
@property
@lru_cache(maxsize=1)
def type(self) -> Optional[str]:
return self._type or Portal.get_schema_type(self._data)
return self._type or Portal.get_schema_type(self._data) or (Schema(self._schema).type if self._schema else None)

@property
@lru_cache(maxsize=1)
Expand All @@ -44,7 +46,7 @@ def uuid(self) -> Optional[str]:
@property
@lru_cache(maxsize=1)
def schema(self) -> Optional[dict]:
return self._portal.get_schema(self.type) if self._portal else None
return self._schema if self._schema else (self._portal.get_schema(self.type) if self._portal else None)

def copy(self) -> PortalObject:
return PortalObject(deepcopy(self.data), portal=self.portal, type=self.type)
Expand Down
87 changes: 51 additions & 36 deletions dcicutils/portal_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,9 +88,9 @@ def init_from_key(key: dict, server: Optional[str], unspecified: Optional[list]
isinstance(secret := key.get("secret"), str) and secret): # noqa
self._key = {"key": key_id, "secret": secret}
if (isinstance(server, str) and server) or (isinstance(server := key.get("server"), str) and server):
if server := normalize_server(server):
if server := Portal._normalize_server(server):
if isinstance(key_server := key.get("server"), str) and key_server:
if normalize_server(key_server) != server:
if Portal._normalize_server(key_server) != server:
raise Exception(f"Portal server inconsistency: {server} vs {key_server}")
self._key["server"] = self._server = server
if not self._key:
Expand All @@ -104,49 +104,21 @@ def init_from_key_pair(key_pair: tuple, server: Optional[str], unspecified: Opti

def init_from_keys_file(keys_file: str, env: Optional[str], server: Optional[str],
unspecified: Optional[list] = []) -> None:
try:
with io.open(keys_file := os.path.expanduser(keys_file)) as f:
keys = json.load(f)
except Exception:
raise Exception(f"Portal initialization error; cannot open keys-file: {keys_file}")
if isinstance(env, str) and env and isinstance(key := keys.get(env), dict):
init_from_key(key, server)
self._keys_file = keys_file
self._env = env
elif (isinstance(server, str) and (server := normalize_server(server)) and
(key := [keys[k] for k in keys if normalize_server(keys[k].get("server")) == server])):
init_from_key(key[0], server)
self._keys_file = keys_file
elif not env and len(keys) == 1 and (env := next(iter(keys))) and isinstance(key := keys[env], dict):
key, env = Portal._lookup_in_keys_file(keys_file, env, server, raise_exception=True)
if key:
init_from_key(key, server)
self._keys_file = keys_file
self._env = env
else:
raise Exception(f"Portal initialization error;"
f" {env or server or None} not found in keys-file: {keys_file}")

def init_from_env_server_app(env: str, server: str, app: Optional[str],
unspecified: Optional[list] = None) -> None:
if keys_file := self._default_keys_file(app, env):
if keys_file := Portal._default_keys_file(app, env, server):
init_from_keys_file(keys_file, env, server, unspecified=unspecified)
else:
init(unspecified=unspecified)
self._env = env
self._server = server

def normalize_server(server: str) -> Optional[str]:
prefix = ""
if (lowercase_server := server.lower()).startswith("http://"):
prefix = "http://"
elif lowercase_server.startswith("https://"):
prefix = "https://"
if prefix:
if (server := re.sub(r"/+", "/", server[len(prefix):])).startswith("/"):
server = server[1:]
if len(server) > 1 and server.endswith("/"):
server = server[:-1]
return prefix + server if server else None

if (valid_app := app) and not (valid_app := Portal._valid_app(app)):
raise Exception(f"Portal initialization error; invalid app: {app}")
self._app = valid_app
Expand All @@ -166,7 +138,7 @@ def normalize_server(server: str) -> Optional[str]:
init_from_env_server_app(arg, server, app, unspecified=[env])
elif (isinstance(env, str) and env) or (isinstance(server, str) and server):
init_from_env_server_app(env, server, app, unspecified=[arg])
elif not arg and (keys_file := self._default_keys_file(app=self._app or Portal.DEFAULT_APP, env=env)):
elif not arg and (keys_file := Portal._default_keys_file(self._app or Portal.DEFAULT_APP, env, server)):
# If no initial arg then look for default app keys file.
init_from_keys_file(keys_file, env, server)
elif raise_exception:
Expand Down Expand Up @@ -411,14 +383,57 @@ def _kwargs(self, **kwargs) -> dict:
result_kwargs["timeout"] = timeout
return result_kwargs

def _default_keys_file(self, app: Optional[str], env: Optional[str] = None) -> Optional[str]:
@staticmethod
def _default_keys_file(app: Optional[str], env: Optional[str], server: Optional[str]) -> Optional[str]:
def infer_app_from_env(env: str) -> Optional[str]: # noqa
if isinstance(env, str) and (lowercase_env := env.lower()):
if app := [app for app in ORCHESTRATED_APPS if lowercase_env.startswith(app.lower())]:
return self._valid_app(app[0])
return Portal._valid_app(app[0])
if (app := Portal._valid_app(app)) or (app := infer_app_from_env(env)):
keys_file = os.path.expanduser(os.path.join(Portal.KEYS_FILE_DIRECTORY, f".{app.lower()}-keys.json"))
return keys_file if os.path.exists(keys_file) else None
if not app:
for app in ORCHESTRATED_APPS:
if keys_file := Portal._default_keys_file(app, env, server):
if Portal._lookup_in_keys_file(keys_file, env, server)[0]:
return keys_file

@staticmethod
def _lookup_in_keys_file(keys_file: str, env: Optional[str], server: Optional[str],
raise_exception: bool = False) -> Tuple[Optional[dict], Optional[str]]:
try:
with io.open(keys_file := os.path.expanduser(keys_file)) as f:
keys = json.load(f)
except Exception:
if raise_exception:
raise Exception(f"Portal initialization error; cannot open keys-file: {keys_file}")
return None, None
if isinstance(env, str) and env and isinstance(key := keys.get(env), dict):
return key, env
elif (isinstance(server, str) and (server := Portal._normalize_server(server)) and
(key := [keys[k] for k in keys if Portal._normalize_server(keys[k].get("server")) == server])):
return key[0], env
elif not env and len(keys) == 1 and (env := next(iter(keys))) and isinstance(key := keys[env], dict):
return key, env
else:
if raise_exception:
raise Exception(f"Portal initialization error;"
f" {env or server or None} not found in keys-file: {keys_file}")
return None, None

@staticmethod
def _normalize_server(server: str) -> Optional[str]:
prefix = ""
if (lowercase_server := server.lower()).startswith("http://"):
prefix = "http://"
elif lowercase_server.startswith("https://"):
prefix = "https://"
if prefix:
if (server := re.sub(r"/+", "/", server[len(prefix):])).startswith("/"):
server = server[1:]
if len(server) > 1 and server.endswith("/"):
server = server[:-1]
return prefix + server if server else None

@staticmethod
def _valid_app(app: Optional[str]) -> Optional[str]:
Expand Down
Loading
Loading