diff --git a/src/toffy/mph_comp.py b/src/toffy/mph_comp.py index 41a832e1..cbf6a4e1 100644 --- a/src/toffy/mph_comp.py +++ b/src/toffy/mph_comp.py @@ -111,11 +111,12 @@ def compute_mph_metrics(bin_file_dir, csv_dir, fov, mass=98, mass_start=97.5, ma out_df.to_csv(os.path.join(csv_dir, pulse_height_file), index=False) -def combine_mph_metrics(csv_dir, return_data=False): +def combine_mph_metrics(csv_dir, return_data=False, warn_overwrite=True): """Combines data from individual csvs into one Args: csv_dir (str): path where FOV mph data csvs are stored return_data (bool): whether to return dataframe with mph metrics, default False + warn_overwrite (bool): whether to warn if existing `_combined.csv` file found Returns: combined mph data for all FOVs @@ -125,7 +126,7 @@ def combine_mph_metrics(csv_dir, return_data=False): io_utils.validate_paths(csv_dir) # combine individual csv files - combine_run_metrics(csv_dir, "mph_pulse") + combine_run_metrics(csv_dir, "mph_pulse", warn_overwrite) # calculate cumulative sums of total counts and time combined_df = pd.read_csv(os.path.join(csv_dir, "mph_pulse_combined.csv")) diff --git a/src/toffy/normalize.py b/src/toffy/normalize.py index c362a88a..7a9c1cc0 100644 --- a/src/toffy/normalize.py +++ b/src/toffy/normalize.py @@ -214,12 +214,13 @@ def pred_func(x): return pred_func -def combine_run_metrics(run_dir, substring): +def combine_run_metrics(run_dir, substring, warn_overwrite=True): """Combines the specified metrics from different FOVs into a single file Args: run_dir (str): the directory containing the files - substring(str): the substring contained within the files to be combined""" + substring(str): the substring contained within the files to be combined + warn_overwrite (bool): whether to warn if existing `_combined.csv` file found""" files = io_utils.list_files(run_dir, substring) @@ -228,9 +229,11 @@ def combine_run_metrics(run_dir, substring): raise ValueError("No files found in {}".format(run_dir)) if substring + "_combined.csv" in files: - warnings.warn( - "Removing previously generated combined {} file in {}".format(substring, run_dir) - ) + if warn_overwrite: + print("Warning of overwriting!") + warnings.warn( + "Removing previously generated combined {} file in {}".format(substring, run_dir) + ) os.remove(os.path.join(run_dir, substring + "_combined.csv")) files = [file for file in files if "combined" not in file] diff --git a/src/toffy/qc_comp.py b/src/toffy/qc_comp.py index 339b1722..e8ab40ee 100644 --- a/src/toffy/qc_comp.py +++ b/src/toffy/qc_comp.py @@ -3,6 +3,7 @@ import os import pathlib import re +import warnings from dataclasses import dataclass, field from shutil import rmtree from typing import Dict, List, Optional, Tuple, Union @@ -386,12 +387,14 @@ def compute_qc_metrics_direct(image_data, fov_name, gaussian_blur=False, blur_fa return metric_csvs -def combine_qc_metrics(qc_metrics_dir): +def combine_qc_metrics(qc_metrics_dir, warn_overwrite=True): """Aggregates the QC results of each FOV into one `.csv` Args: qc_metrics_dir (str): the name of the folder containing the QC metric files + warn_overwrite (bool): + whether to warn if existing combined CSV found for each metric """ # path validation check @@ -417,6 +420,10 @@ def combine_qc_metrics(qc_metrics_dir): # write the aggregated metric data # NOTE: if this combined metric file already exists, it will be overwritten + if os.path.exists(os.path.join(qc_metrics_dir, "combined_%s.csv" % ms)) and warn_overwrite: + warnings.warn( + "Removing previously generated combined %s file in %s" % (ms, qc_metrics_dir) + ) metric_df.to_csv(os.path.join(qc_metrics_dir, "combined_%s.csv" % ms), index=False) diff --git a/src/toffy/watcher_callbacks.py b/src/toffy/watcher_callbacks.py index aad6836b..2ebc036d 100644 --- a/src/toffy/watcher_callbacks.py +++ b/src/toffy/watcher_callbacks.py @@ -34,12 +34,14 @@ class RunCallbacks: run_folder: str - def plot_qc_metrics(self, qc_out_dir: str, **kwargs): + def plot_qc_metrics(self, qc_out_dir: str, warn_overwrite=False, **kwargs): """Plots qc metrics generated by the `generate_qc` callback Args: qc_out_dir (str): Directory containing qc metric csv + warn_overwrite (bool): whether to warn if existing `_combined.csv` file found, + needed to curb watcher output if `plot_qc_metrics` set as intermediate callback **kwargs (Dict[str, Any]): Additional arguments for `toffy.qc_comp.visualize_qc_metrics`. Accepted kwargs are @@ -57,7 +59,7 @@ def plot_qc_metrics(self, qc_out_dir: str, **kwargs): viz_kwargs = {k: v for k, v in kwargs.items() if k in valid_kwargs} qc_plots = {} - combine_qc_metrics(qc_out_dir) + combine_qc_metrics(qc_out_dir, warn_overwrite=warn_overwrite) for metric_name in QC_COLUMNS: qc_plots[metric_name] = visualize_qc_metrics( metric_name, qc_out_dir, **viz_kwargs, return_plot=True @@ -65,12 +67,14 @@ def plot_qc_metrics(self, qc_out_dir: str, **kwargs): return qc_plots - def plot_mph_metrics(self, mph_out_dir, plot_dir, **kwargs): + def plot_mph_metrics(self, mph_out_dir, plot_dir, warn_overwrite=False, **kwargs): """Plots mph metrics generated by the `generate_mph` callback Args: mph_out_dir (str): directory containing mph metric csv plot_dir (str): director to store the plot to + warn_overwrite (bool): whether to warn if existing `_combined.csv` file found, + needed to curb watcher output if `plot_mph_metrics` set as intermediate callback **kwargs (Dict[str, Any]): Additional arguments for `toffy.mph_comp.visualize_mph`. Accepted kwargs are @@ -89,7 +93,8 @@ def plot_mph_metrics(self, mph_out_dir, plot_dir, **kwargs): ] viz_kwargs = {k: v for k, v in kwargs.items() if k in valid_kwargs} - mph_df = combine_mph_metrics(mph_out_dir, return_data=True) + # set verbose to false to prevent overwrite error from popping up each FOV + mph_df = combine_mph_metrics(mph_out_dir, return_data=True, warn_overwrite=warn_overwrite) mph_fig = visualize_mph(mph_df, plot_dir, **viz_kwargs, return_plot=True) return mph_fig @@ -358,16 +363,6 @@ def build_callbacks( run_callbacks + intermediate_callbacks if intermediate_callbacks else run_callbacks[:] ) - for run_cb in callbacks_with_prereq: - argnames = inspect.getfullargspec(getattr(RunCallbacks, run_cb))[0] - argnames = [argname for argname in argnames if argname != "self"] - - misc_utils.verify_in_list(required_arguments=argnames, passed_arguments=list(kwargs.keys())) - - callbacks_with_prereq = ( - run_callbacks + intermediate_callbacks if intermediate_callbacks else run_callbacks[:] - ) - for run_cb in callbacks_with_prereq: argnames = inspect.getfullargspec(getattr(RunCallbacks, run_cb))[0] argnames = [argname for argname in argnames if argname != "self"] diff --git a/templates/3a_monitor_MIBI_run.ipynb b/templates/3a_monitor_MIBI_run.ipynb index bc42d625..b04af332 100644 --- a/templates/3a_monitor_MIBI_run.ipynb +++ b/templates/3a_monitor_MIBI_run.ipynb @@ -113,6 +113,7 @@ " plot_dir=metrics_plot_dir,\n", " save_dir=metrics_plot_dir,\n", " panel=panel,\n", + " warn_overwrite=False,\n", ")" ] }, @@ -128,7 +129,7 @@ ], "metadata": { "kernelspec": { - "display_name": "toffy38", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -142,7 +143,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.16" + "version": "3.9.16" }, "vscode": { "interpreter": { diff --git a/tests/fov_watcher_test.py b/tests/fov_watcher_test.py index 5b89b374..14d75896 100644 --- a/tests/fov_watcher_test.py +++ b/tests/fov_watcher_test.py @@ -164,6 +164,9 @@ def test_watcher( kwargs["pulse_out_dir"] = pulse_out_dir kwargs["plot_dir"] = plot_dir + # ensure warn_overwrite set to False if intermediate callbacks set, otherwise True + kwargs["warn_overwrite"] = True if int_cbs else False + run_data = os.path.join(tmpdir, "test_run") log_out = os.path.join(tmpdir, "log_output") os.makedirs(run_data) diff --git a/tests/mph_comp_test.py b/tests/mph_comp_test.py index 48da4a9b..d3383355 100644 --- a/tests/mph_comp_test.py +++ b/tests/mph_comp_test.py @@ -8,6 +8,8 @@ from toffy import mph_comp as mph +parametrize = pytest.mark.parametrize + def create_sample_mph_data(fov, mph_value, total_count, time): data = pd.DataFrame( @@ -89,7 +91,8 @@ def test_compute_mph_metrics(): assert csv_data.equals(mph_data) -def test_combine_mph_metrics(): +@parametrize("warn_overwrite_test", [True, False]) +def test_combine_mph_metrics(warn_overwrite_test): bad_path = os.path.join(Path(__file__).parents[1], "data", "not-a-folder") # bad directory path should raise an error @@ -116,6 +119,15 @@ def test_combine_mph_metrics(): assert os.path.exists(combined_csv_path) assert csv_data.equals(combined_data) + # test warn_overwrite flag + if warn_overwrite_test: + with pytest.warns( + UserWarning, match="Removing previously generated combined mph_pulse" + ): + mph.combine_mph_metrics(csv_path, warn_overwrite_test) + else: + mph.combine_mph_metrics(csv_path, warn_overwrite_test) + def test_visualize_mph(): bad_path = os.path.join(Path(__file__).parents[1], "data", "not-a-folder") diff --git a/tests/normalize_test.py b/tests/normalize_test.py index 87eec81f..4a163284 100644 --- a/tests/normalize_test.py +++ b/tests/normalize_test.py @@ -149,7 +149,8 @@ def test_create_prediction_function(obj_func, num_params): @parametrize_with_cases("metrics", cases=test_cases.CombineRunMetricFiles) -def test_combine_run_metrics(metrics): +@parametrize("warn_overwrite_test", [True, False]) +def test_combine_run_metrics(metrics, warn_overwrite_test): with tempfile.TemporaryDirectory() as temp_dir: for metric in metrics: name, values_df = metric[0], pd.DataFrame(metric[1]) @@ -163,8 +164,12 @@ def test_combine_run_metrics(metrics): assert len(combined_data) == len(metrics) * 10 # check that previously generated combined file is removed with warning - with pytest.warns(UserWarning, match="previously generated"): - normalize.combine_run_metrics(temp_dir, "pulse_height") + # NOTE: only if warn_overwrite turned on + if warn_overwrite_test: + with pytest.warns(UserWarning, match="previously generated"): + normalize.combine_run_metrics(temp_dir, "pulse_height", warn_overwrite_test) + else: + normalize.combine_run_metrics(temp_dir, "pulse_height", warn_overwrite_test) # check that files with different lengths raises error name, bad_vals = metrics[0][0], pd.DataFrame(metrics[0][1]) diff --git a/tests/qc_comp_test.py b/tests/qc_comp_test.py index 7ad22ea2..b5f5273c 100644 --- a/tests/qc_comp_test.py +++ b/tests/qc_comp_test.py @@ -149,7 +149,8 @@ def test_compute_qc_metrics(gaussian_blur, bin_file_folder, fovs): @parametrize("fovs", [["fov-1-scan-1"], ["fov-1-scan-1", "fov-2-scan-1"]]) -def test_combine_qc_metrics(fovs): +@parametrize("warn_overwrite_test", [True, False]) +def test_combine_qc_metrics(fovs, warn_overwrite_test): with tempfile.TemporaryDirectory() as temp_dir: # bin folder error check with pytest.raises(FileNotFoundError): @@ -192,8 +193,19 @@ def test_combine_qc_metrics(fovs): os.path.join(bin_file_path, "%s_%s.csv" % (fov, ms)), index=False ) - # run the aggregation function - qc_comp.combine_qc_metrics(bin_file_path) + # create a dummy file if testing the warn_overwrite flag + if warn_overwrite_test: + Path(os.path.join(bin_file_path, "combined_%s.csv" % settings.QC_SUFFIXES[0])).touch() + + # run the aggregation function, testing that both warn_overwrite states work properly + if warn_overwrite_test: + with pytest.warns( + UserWarning, + match="Removing previously generated combined %s" % settings.QC_SUFFIXES[0], + ): + qc_comp.combine_qc_metrics(bin_file_path, warn_overwrite_test) + else: + qc_comp.combine_qc_metrics(bin_file_path, warn_overwrite_test) for ms, mv, mc in zip(settings.QC_SUFFIXES, metric_start_vals, settings.QC_COLUMNS): # assert the combined QC metric file was created diff --git a/tests/watcher_callbacks_test.py b/tests/watcher_callbacks_test.py index f92772cc..ff4713f0 100644 --- a/tests/watcher_callbacks_test.py +++ b/tests/watcher_callbacks_test.py @@ -44,6 +44,7 @@ def test_build_fov_callback(callbacks, kwargs, data_path): kwargs["mph_out_dir"] = file_dir kwargs["pulse_out_dir"] = file_dir kwargs["plot_dir"] = plot_dir + kwargs["warn_overwrite"] = False run_data = os.path.join(tmp_dir, "tissue") os.makedirs(run_data) @@ -98,6 +99,8 @@ def test_build_callbacks(viz_mock, run_callbacks, inter_callbacks, kwargs, data_ if kwargs.get("save_dir", False): kwargs["save_dir"] = qc_dir + kwargs["warn_overwrite"] = True if inter_callbacks else False + fcb, rcb, icb = watcher_callbacks.build_callbacks( run_callbacks=run_callbacks, intermediate_callbacks=inter_callbacks, **kwargs )