Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

v0.9.2 release #194

Merged
merged 8 commits into from
Jun 3, 2024
2 changes: 1 addition & 1 deletion .github/workflows/black.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ jobs:
steps:
- uses: actions/checkout@v2
- uses: actions/setup-python@v2
- uses: psf/black@20.8b1
- uses: psf/black@stable
with:
options: "--check --diff --color --verbose --line-length 99"

6 changes: 6 additions & 0 deletions docs/changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,12 @@

This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html) and [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) format.

## [0.9.2] - 2024-06-24
### Changed
- User can override pipeline name via parameter or config file, otherwise look at output_schema, then fall back on default as last resort.
- Allow pipestat to proceed without creating a results file backend IF using "{record_identifier}" in the file path, helps address [Looper #471](https://github.com/pepkit/looper/issues/471)
- Reduce overall verbosity when creating backends

## [0.9.1] - 2024-04-24
### Fixed
- Pipestat summarize html report columns now show stats only [#148](https://github.com/pepkit/pipestat/issues/148).
Expand Down
2 changes: 1 addition & 1 deletion pipestat/_version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.9.1"
__version__ = "0.9.2"
2 changes: 1 addition & 1 deletion pipestat/backends/db_backend/dbbackend.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ def __init__(
"""

super().__init__(pipeline_type)
_LOGGER.warning(f"Initializing DBBackend for pipeline '{pipeline_name}'")
_LOGGER.debug(f"Initializing DBBackend for pipeline '{pipeline_name}'")
self.pipeline_name = pipeline_name
self.pipeline_type = pipeline_type or "sample"
self.record_identifier = record_identifier
Expand Down
25 changes: 15 additions & 10 deletions pipestat/backends/file_backend/filebackend.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def __init__(

"""
super().__init__(pipeline_type)
_LOGGER.warning("Initialize FileBackend")
_LOGGER.debug("Initialize FileBackend")

self.results_file_path = results_file_path
self.pipeline_name = pipeline_name
Expand All @@ -66,21 +66,26 @@ def __init__(
self.result_formatter = result_formatter
self.multi_pipelines = multi_pipelines

self.determine_results_file(self.results_file_path)
self.determine_results_file()

def determine_results_file(self, results_file_path: str) -> None:
def determine_results_file(self) -> None:
"""Initialize or load results_file from given path
:param str results_file_path: YAML file to report into, if file is
used as the object back-end
"""
if not os.path.exists(self.results_file_path):
_LOGGER.debug(
f"Results file doesn't yet exist. Initializing: {self.results_file_path}"
)
self._init_results_file()

if "{record_identifier}" in self.results_file_path:
# In the special case where the user wants to use {record_identifier} in file path
pass
else:
_LOGGER.debug(f"Loading results file: {self.results_file_path}")
self._load_results_file()
if not os.path.exists(self.results_file_path):
_LOGGER.debug(
f"Results file doesn't yet exist. Initializing: {self.results_file_path}"
)
self._init_results_file()
else:
_LOGGER.debug(f"Loading results file: {self.results_file_path}")
self._load_results_file()

def check_record_exists(
self,
Expand Down
36 changes: 26 additions & 10 deletions pipestat/pipestat.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ def __init__(
flag_file_dir: Optional[str] = None,
show_db_logs: bool = False,
pipeline_type: Optional[str] = None,
pipeline_name: Optional[str] = DEFAULT_PIPELINE_NAME,
pipeline_name: Optional[str] = None,
result_formatter: staticmethod = default_formatter,
multi_pipelines: bool = False,
output_dir: Optional[str] = None,
Expand Down Expand Up @@ -193,10 +193,14 @@ def __init__(
"record_identifier", env_var=ENV_VARS["record_identifier"], override=record_identifier
)

# TODO this is a work around for Looper ~ https://github.com/pepkit/looper/issues/492, sharing pipeline names
# In the future, we should get piepline name only from output schema.
self.cfg[PIPELINE_NAME] = (
self.cfg[SCHEMA_KEY].pipeline_name
pipeline_name
or self.cfg[CONFIG_KEY].get(PIPELINE_NAME)
or self.cfg[SCHEMA_KEY].pipeline_name
if self.cfg[SCHEMA_KEY] is not None
else pipeline_name
else DEFAULT_PIPELINE_NAME
)

self.cfg[PROJECT_NAME] = self.cfg[CONFIG_KEY].priority_get(
Expand Down Expand Up @@ -225,7 +229,12 @@ def __init__(
),
self.cfg["config_path"],
)
make_subdirectories(self.cfg[FILE_KEY])

if "{record_identifier}" in str(self.cfg[FILE_KEY]):
# In the special case where the user wants to use {record_identifier} in file path
pass
else:
make_subdirectories(self.cfg[FILE_KEY])

self.cfg[RESULT_FORMATTER] = result_formatter

Expand Down Expand Up @@ -326,12 +335,19 @@ def resolve_results_file_path(self, results_file_path):
# Save for later when assessing if there may be multiple result files
if results_file_path:
assert isinstance(results_file_path, str), TypeError("Path is expected to be a str")
if not self.record_identifier and "{record_identifier}" in results_file_path:
raise NotImplementedError(
f"Must provide record identifier during PipestatManager creation for this results_file_path: {results_file_path}"
)
self.cfg["unresolved_result_path"] = results_file_path
return results_file_path.format(record_identifier=self.record_identifier)
if self.record_identifier:
try:
self.cfg["unresolved_result_path"] = results_file_path
results_file_path = results_file_path.format(
record_identifier=self.record_identifier
)
return results_file_path
except AttributeError:
self.cfg["unresolved_result_path"] = results_file_path
return results_file_path
else:
self.cfg["unresolved_result_path"] = results_file_path
return results_file_path
return results_file_path

def initialize_filebackend(self, record_identifier, results_file_path, flag_file_dir):
Expand Down
4 changes: 2 additions & 2 deletions tests/test_pipestat.py
Original file line number Diff line number Diff line change
Expand Up @@ -2245,8 +2245,8 @@ def test_multi_results_not_implemented(
backend_data = {"results_file_path": results_file_path}
args.update(backend_data)

with pytest.raises(NotImplementedError):
psm = SamplePipestatManager(**args)
# with pytest.raises(NotImplementedError):
psm = SamplePipestatManager(**args)

@pytest.mark.parametrize("backend", ["file"])
def test_multi_results_basic(
Expand Down
Loading