Merge pull request #194 from pepkit/dev

v0.9.2 release
pepkit · Jun 3, 2024 · 698d024 · 698d024
2 parents 341201f + 35fcb53
commit 698d024
Show file tree

Hide file tree

Showing 7 changed files with 52 additions and 25 deletions.
diff --git a/.github/workflows/black.yml b/.github/workflows/black.yml
@@ -8,7 +8,7 @@ jobs:
     steps:
       - uses: actions/checkout@v2
       - uses: actions/setup-python@v2
-      - uses: psf/black@20.8b1
+      - uses: psf/black@stable
         with:
           options: "--check --diff --color --verbose --line-length 99"
 
diff --git a/docs/changelog.md b/docs/changelog.md
@@ -2,6 +2,12 @@
 
 This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html) and [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) format.
 
+## [0.9.2] - 2024-06-24
+### Changed
+- User can override pipeline name via parameter or config file, otherwise look at output_schema, then fall back on default as last resort.
+- Allow pipestat to proceed without creating a results file backend IF using "{record_identifier}" in the file path, helps address [Looper #471](https://github.com/pepkit/looper/issues/471)
+- Reduce overall verbosity when creating backends
+
 ## [0.9.1] - 2024-04-24
 ### Fixed
 - Pipestat summarize html report columns now show stats only  [#148](https://github.com/pepkit/pipestat/issues/148).

diff --git a/pipestat/_version.py b/pipestat/_version.py
@@ -1 +1 @@
-__version__ = "0.9.1"
+__version__ = "0.9.2"
diff --git a/pipestat/backends/db_backend/dbbackend.py b/pipestat/backends/db_backend/dbbackend.py
@@ -51,7 +51,7 @@ def __init__(
         """
 
         super().__init__(pipeline_type)
-        _LOGGER.warning(f"Initializing DBBackend for pipeline '{pipeline_name}'")
+        _LOGGER.debug(f"Initializing DBBackend for pipeline '{pipeline_name}'")
         self.pipeline_name = pipeline_name
         self.pipeline_type = pipeline_type or "sample"
         self.record_identifier = record_identifier

diff --git a/pipestat/backends/file_backend/filebackend.py b/pipestat/backends/file_backend/filebackend.py
@@ -54,7 +54,7 @@ def __init__(
 
         """
         super().__init__(pipeline_type)
-        _LOGGER.warning("Initialize FileBackend")
+        _LOGGER.debug("Initialize FileBackend")
 
         self.results_file_path = results_file_path
         self.pipeline_name = pipeline_name
@@ -66,21 +66,26 @@ def __init__(
         self.result_formatter = result_formatter
         self.multi_pipelines = multi_pipelines
 
-        self.determine_results_file(self.results_file_path)
+        self.determine_results_file()
 
-    def determine_results_file(self, results_file_path: str) -> None:
+    def determine_results_file(self) -> None:
         """Initialize or load results_file from given path
         :param str results_file_path: YAML file to report into, if file is
         used as the object back-end
         """
-        if not os.path.exists(self.results_file_path):
-            _LOGGER.debug(
-                f"Results file doesn't yet exist. Initializing: {self.results_file_path}"
-            )
-            self._init_results_file()
+
+        if "{record_identifier}" in self.results_file_path:
+            # In the special case where the user wants to use {record_identifier} in file path
+            pass
         else:
-            _LOGGER.debug(f"Loading results file: {self.results_file_path}")
-            self._load_results_file()
+            if not os.path.exists(self.results_file_path):
+                _LOGGER.debug(
+                    f"Results file doesn't yet exist. Initializing: {self.results_file_path}"
+                )
+                self._init_results_file()
+            else:
+                _LOGGER.debug(f"Loading results file: {self.results_file_path}")
+                self._load_results_file()
 
     def check_record_exists(
         self,

diff --git a/pipestat/pipestat.py b/pipestat/pipestat.py
@@ -134,7 +134,7 @@ def __init__(
         flag_file_dir: Optional[str] = None,
         show_db_logs: bool = False,
         pipeline_type: Optional[str] = None,
-        pipeline_name: Optional[str] = DEFAULT_PIPELINE_NAME,
+        pipeline_name: Optional[str] = None,
         result_formatter: staticmethod = default_formatter,
         multi_pipelines: bool = False,
         output_dir: Optional[str] = None,
@@ -193,10 +193,14 @@ def __init__(
             "record_identifier", env_var=ENV_VARS["record_identifier"], override=record_identifier
         )
 
+        # TODO this is a work around for Looper ~ https://github.com/pepkit/looper/issues/492, sharing pipeline names
+        # In the future, we should get piepline name only from output schema.
         self.cfg[PIPELINE_NAME] = (
-            self.cfg[SCHEMA_KEY].pipeline_name
+            pipeline_name
+            or self.cfg[CONFIG_KEY].get(PIPELINE_NAME)
+            or self.cfg[SCHEMA_KEY].pipeline_name
             if self.cfg[SCHEMA_KEY] is not None
-            else pipeline_name
+            else DEFAULT_PIPELINE_NAME
         )
 
         self.cfg[PROJECT_NAME] = self.cfg[CONFIG_KEY].priority_get(
@@ -225,7 +229,12 @@ def __init__(
             ),
             self.cfg["config_path"],
         )
-        make_subdirectories(self.cfg[FILE_KEY])
+
+        if "{record_identifier}" in str(self.cfg[FILE_KEY]):
+            # In the special case where the user wants to use {record_identifier} in file path
+            pass
+        else:
+            make_subdirectories(self.cfg[FILE_KEY])
 
         self.cfg[RESULT_FORMATTER] = result_formatter
 
@@ -326,12 +335,19 @@ def resolve_results_file_path(self, results_file_path):
         # Save for later when assessing if there may be multiple result files
         if results_file_path:
             assert isinstance(results_file_path, str), TypeError("Path is expected to be a str")
-            if not self.record_identifier and "{record_identifier}" in results_file_path:
-                raise NotImplementedError(
-                    f"Must provide record identifier during PipestatManager creation for this results_file_path: {results_file_path}"
-                )
-            self.cfg["unresolved_result_path"] = results_file_path
-            return results_file_path.format(record_identifier=self.record_identifier)
+            if self.record_identifier:
+                try:
+                    self.cfg["unresolved_result_path"] = results_file_path
+                    results_file_path = results_file_path.format(
+                        record_identifier=self.record_identifier
+                    )
+                    return results_file_path
+                except AttributeError:
+                    self.cfg["unresolved_result_path"] = results_file_path
+                    return results_file_path
+            else:
+                self.cfg["unresolved_result_path"] = results_file_path
+                return results_file_path
         return results_file_path
 
     def initialize_filebackend(self, record_identifier, results_file_path, flag_file_dir):

diff --git a/tests/test_pipestat.py b/tests/test_pipestat.py
@@ -2245,8 +2245,8 @@ def test_multi_results_not_implemented(
             backend_data = {"results_file_path": results_file_path}
             args.update(backend_data)
 
-            with pytest.raises(NotImplementedError):
-                psm = SamplePipestatManager(**args)
+            # with pytest.raises(NotImplementedError):
+            psm = SamplePipestatManager(**args)
 
     @pytest.mark.parametrize("backend", ["file"])
     def test_multi_results_basic(