From c1a1a721438ca5eb7d448991b0c452878f47b116 Mon Sep 17 00:00:00 2001 From: Donald Campbell <125581724+donaldcampbelljr@users.noreply.github.com> Date: Mon, 20 May 2024 13:22:26 -0400 Subject: [PATCH 1/8] change gha to black stable --- .github/workflows/black.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/black.yml b/.github/workflows/black.yml index 15435ea3..342c8430 100644 --- a/.github/workflows/black.yml +++ b/.github/workflows/black.yml @@ -8,7 +8,7 @@ jobs: steps: - uses: actions/checkout@v2 - uses: actions/setup-python@v2 - - uses: psf/black@20.8b1 + - uses: psf/black@stable with: options: "--check --diff --color --verbose --line-length 99" From adecb3d04b6755ba486afb857c2eb7a0514c236f Mon Sep 17 00:00:00 2001 From: Donald Campbell <125581724+donaldcampbelljr@users.noreply.github.com> Date: Thu, 23 May 2024 16:38:22 -0400 Subject: [PATCH 2/8] ensure that user supplied pipeline_name takes precedence --- pipestat/pipestat.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/pipestat/pipestat.py b/pipestat/pipestat.py index 8a689781..2b750357 100644 --- a/pipestat/pipestat.py +++ b/pipestat/pipestat.py @@ -134,7 +134,7 @@ def __init__( flag_file_dir: Optional[str] = None, show_db_logs: bool = False, pipeline_type: Optional[str] = None, - pipeline_name: Optional[str] = DEFAULT_PIPELINE_NAME, + pipeline_name: Optional[str] = None, result_formatter: staticmethod = default_formatter, multi_pipelines: bool = False, output_dir: Optional[str] = None, @@ -199,6 +199,12 @@ def __init__( else pipeline_name ) + self.cfg[PIPELINE_NAME] = ( + pipeline_name or self.cfg[SCHEMA_KEY].pipeline_name + if self.cfg[SCHEMA_KEY] is not None + else DEFAULT_PIPELINE_NAME + ) + self.cfg[PROJECT_NAME] = self.cfg[CONFIG_KEY].priority_get( "project_name", env_var=ENV_VARS["project_name"], override=project_name ) From adced7551a4f00c4eac19fb9e2b81eab5b00b4bb Mon Sep 17 00:00:00 2001 From: Donald Campbell <125581724+donaldcampbelljr@users.noreply.github.com> Date: Thu, 23 May 2024 17:08:53 -0400 Subject: [PATCH 3/8] add other ways to set pipeline_name --- pipestat/pipestat.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/pipestat/pipestat.py b/pipestat/pipestat.py index 2b750357..9fc55fe6 100644 --- a/pipestat/pipestat.py +++ b/pipestat/pipestat.py @@ -194,13 +194,9 @@ def __init__( ) self.cfg[PIPELINE_NAME] = ( - self.cfg[SCHEMA_KEY].pipeline_name - if self.cfg[SCHEMA_KEY] is not None - else pipeline_name - ) - - self.cfg[PIPELINE_NAME] = ( - pipeline_name or self.cfg[SCHEMA_KEY].pipeline_name + pipeline_name + or self.cfg[CONFIG_KEY].get(PIPELINE_NAME) + or self.cfg[SCHEMA_KEY].pipeline_name if self.cfg[SCHEMA_KEY] is not None else DEFAULT_PIPELINE_NAME ) From b0883d8d3ec686c3af461cfb7ad3e83d6fb02c2c Mon Sep 17 00:00:00 2001 From: Donald Campbell <125581724+donaldcampbelljr@users.noreply.github.com> Date: Fri, 24 May 2024 14:41:52 -0400 Subject: [PATCH 4/8] add comment about grabbing the pipelinename from configuration file --- pipestat/pipestat.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pipestat/pipestat.py b/pipestat/pipestat.py index 9fc55fe6..cdc5b264 100644 --- a/pipestat/pipestat.py +++ b/pipestat/pipestat.py @@ -193,6 +193,8 @@ def __init__( "record_identifier", env_var=ENV_VARS["record_identifier"], override=record_identifier ) + # TODO this is a work around for Looper ~ https://github.com/pepkit/looper/issues/492, sharing pipeline names + # In the future, we should get piepline name only from output schema. self.cfg[PIPELINE_NAME] = ( pipeline_name or self.cfg[CONFIG_KEY].get(PIPELINE_NAME) From 936bb10928feaeffe344a32754c7fed8c8c2fa49 Mon Sep 17 00:00:00 2001 From: Donald Campbell <125581724+donaldcampbelljr@users.noreply.github.com> Date: Fri, 24 May 2024 14:46:15 -0400 Subject: [PATCH 5/8] update version and changelog 0.9.2a1 --- docs/changelog.md | 4 ++++ pipestat/_version.py | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/docs/changelog.md b/docs/changelog.md index efd78aa9..7a9ad11f 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -2,6 +2,10 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html) and [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) format. +## [0.9.2] - 2024 -xx-xx +### Changed +- User can override pipeline name via parameter or config file, otherwise look at output_schema, then fall back on default as last resort. + ## [0.9.1] - 2024-04-24 ### Fixed - Pipestat summarize html report columns now show stats only [#148](https://github.com/pepkit/pipestat/issues/148). diff --git a/pipestat/_version.py b/pipestat/_version.py index d69d16e9..c9800617 100644 --- a/pipestat/_version.py +++ b/pipestat/_version.py @@ -1 +1 @@ -__version__ = "0.9.1" +__version__ = "0.9.2a1" From b86e1127e1750a5f27302ac461c5caf7157f09b8 Mon Sep 17 00:00:00 2001 From: Donald Campbell <125581724+donaldcampbelljr@users.noreply.github.com> Date: Mon, 3 Jun 2024 12:26:34 -0400 Subject: [PATCH 6/8] change from logger.warning to logger.debug to reduce pipestat verbosity in terminal --- pipestat/backends/db_backend/dbbackend.py | 2 +- pipestat/backends/file_backend/filebackend.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pipestat/backends/db_backend/dbbackend.py b/pipestat/backends/db_backend/dbbackend.py index 1ccd1130..797f5b2e 100644 --- a/pipestat/backends/db_backend/dbbackend.py +++ b/pipestat/backends/db_backend/dbbackend.py @@ -51,7 +51,7 @@ def __init__( """ super().__init__(pipeline_type) - _LOGGER.warning(f"Initializing DBBackend for pipeline '{pipeline_name}'") + _LOGGER.debug(f"Initializing DBBackend for pipeline '{pipeline_name}'") self.pipeline_name = pipeline_name self.pipeline_type = pipeline_type or "sample" self.record_identifier = record_identifier diff --git a/pipestat/backends/file_backend/filebackend.py b/pipestat/backends/file_backend/filebackend.py index 62ba6601..278fc5a6 100644 --- a/pipestat/backends/file_backend/filebackend.py +++ b/pipestat/backends/file_backend/filebackend.py @@ -54,7 +54,7 @@ def __init__( """ super().__init__(pipeline_type) - _LOGGER.warning("Initialize FileBackend") + _LOGGER.debug("Initialize FileBackend") self.results_file_path = results_file_path self.pipeline_name = pipeline_name From 743b26e01231eecd75bebb56f2588b7cf0122999 Mon Sep 17 00:00:00 2001 From: Donald Campbell <125581724+donaldcampbelljr@users.noreply.github.com> Date: Mon, 3 Jun 2024 14:33:54 -0400 Subject: [PATCH 7/8] enable carve out for initializing pipestat with "{record_identifier" --- pipestat/backends/file_backend/filebackend.py | 23 +++++++++------- pipestat/pipestat.py | 26 ++++++++++++++----- tests/test_pipestat.py | 4 +-- 3 files changed, 35 insertions(+), 18 deletions(-) diff --git a/pipestat/backends/file_backend/filebackend.py b/pipestat/backends/file_backend/filebackend.py index 278fc5a6..8a22de50 100644 --- a/pipestat/backends/file_backend/filebackend.py +++ b/pipestat/backends/file_backend/filebackend.py @@ -66,21 +66,26 @@ def __init__( self.result_formatter = result_formatter self.multi_pipelines = multi_pipelines - self.determine_results_file(self.results_file_path) + self.determine_results_file() - def determine_results_file(self, results_file_path: str) -> None: + def determine_results_file(self) -> None: """Initialize or load results_file from given path :param str results_file_path: YAML file to report into, if file is used as the object back-end """ - if not os.path.exists(self.results_file_path): - _LOGGER.debug( - f"Results file doesn't yet exist. Initializing: {self.results_file_path}" - ) - self._init_results_file() + + if "{record_identifier}" in self.results_file_path: + # In the special case where the user wants to use {record_identifier} in file path + pass else: - _LOGGER.debug(f"Loading results file: {self.results_file_path}") - self._load_results_file() + if not os.path.exists(self.results_file_path): + _LOGGER.debug( + f"Results file doesn't yet exist. Initializing: {self.results_file_path}" + ) + self._init_results_file() + else: + _LOGGER.debug(f"Loading results file: {self.results_file_path}") + self._load_results_file() def check_record_exists( self, diff --git a/pipestat/pipestat.py b/pipestat/pipestat.py index cdc5b264..a0dce5ba 100644 --- a/pipestat/pipestat.py +++ b/pipestat/pipestat.py @@ -229,7 +229,12 @@ def __init__( ), self.cfg["config_path"], ) - make_subdirectories(self.cfg[FILE_KEY]) + + if "{record_identifier}" in str(self.cfg[FILE_KEY]): + # In the special case where the user wants to use {record_identifier} in file path + pass + else: + make_subdirectories(self.cfg[FILE_KEY]) self.cfg[RESULT_FORMATTER] = result_formatter @@ -330,12 +335,19 @@ def resolve_results_file_path(self, results_file_path): # Save for later when assessing if there may be multiple result files if results_file_path: assert isinstance(results_file_path, str), TypeError("Path is expected to be a str") - if not self.record_identifier and "{record_identifier}" in results_file_path: - raise NotImplementedError( - f"Must provide record identifier during PipestatManager creation for this results_file_path: {results_file_path}" - ) - self.cfg["unresolved_result_path"] = results_file_path - return results_file_path.format(record_identifier=self.record_identifier) + if self.record_identifier: + try: + self.cfg["unresolved_result_path"] = results_file_path + results_file_path = results_file_path.format( + record_identifier=self.record_identifier + ) + return results_file_path + except AttributeError: + self.cfg["unresolved_result_path"] = results_file_path + return results_file_path + else: + self.cfg["unresolved_result_path"] = results_file_path + return results_file_path return results_file_path def initialize_filebackend(self, record_identifier, results_file_path, flag_file_dir): diff --git a/tests/test_pipestat.py b/tests/test_pipestat.py index dd211663..0deb25dc 100644 --- a/tests/test_pipestat.py +++ b/tests/test_pipestat.py @@ -2245,8 +2245,8 @@ def test_multi_results_not_implemented( backend_data = {"results_file_path": results_file_path} args.update(backend_data) - with pytest.raises(NotImplementedError): - psm = SamplePipestatManager(**args) + # with pytest.raises(NotImplementedError): + psm = SamplePipestatManager(**args) @pytest.mark.parametrize("backend", ["file"]) def test_multi_results_basic( From 35fcb5307d42df9e6135925f3efd39c63c4728f4 Mon Sep 17 00:00:00 2001 From: Donald Campbell <125581724+donaldcampbelljr@users.noreply.github.com> Date: Mon, 3 Jun 2024 14:39:34 -0400 Subject: [PATCH 8/8] update changelog and version for upcoming v0.9.2 --- docs/changelog.md | 4 +++- pipestat/_version.py | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/docs/changelog.md b/docs/changelog.md index 7a9ad11f..48eb68ff 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -2,9 +2,11 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html) and [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) format. -## [0.9.2] - 2024 -xx-xx +## [0.9.2] - 2024-06-24 ### Changed - User can override pipeline name via parameter or config file, otherwise look at output_schema, then fall back on default as last resort. +- Allow pipestat to proceed without creating a results file backend IF using "{record_identifier}" in the file path, helps address [Looper #471](https://github.com/pepkit/looper/issues/471) +- Reduce overall verbosity when creating backends ## [0.9.1] - 2024-04-24 ### Fixed diff --git a/pipestat/_version.py b/pipestat/_version.py index c9800617..a2fecb45 100644 --- a/pipestat/_version.py +++ b/pipestat/_version.py @@ -1 +1 @@ -__version__ = "0.9.2a1" +__version__ = "0.9.2"