Skip to content

Commit

Permalink
Merge branch 'main' into incomplete_bins
Browse files Browse the repository at this point in the history
  • Loading branch information
alex-l-kong authored Jul 24, 2023
2 parents 4d9bfe1 + 15e294d commit 58fc665
Show file tree
Hide file tree
Showing 10 changed files with 73 additions and 31 deletions.
5 changes: 3 additions & 2 deletions src/toffy/mph_comp.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,11 +111,12 @@ def compute_mph_metrics(bin_file_dir, csv_dir, fov, mass=98, mass_start=97.5, ma
out_df.to_csv(os.path.join(csv_dir, pulse_height_file), index=False)


def combine_mph_metrics(csv_dir, return_data=False):
def combine_mph_metrics(csv_dir, return_data=False, warn_overwrite=True):
"""Combines data from individual csvs into one
Args:
csv_dir (str): path where FOV mph data csvs are stored
return_data (bool): whether to return dataframe with mph metrics, default False
warn_overwrite (bool): whether to warn if existing `_combined.csv` file found
Returns:
combined mph data for all FOVs
Expand All @@ -125,7 +126,7 @@ def combine_mph_metrics(csv_dir, return_data=False):
io_utils.validate_paths(csv_dir)

# combine individual csv files
combine_run_metrics(csv_dir, "mph_pulse")
combine_run_metrics(csv_dir, "mph_pulse", warn_overwrite)

# calculate cumulative sums of total counts and time
combined_df = pd.read_csv(os.path.join(csv_dir, "mph_pulse_combined.csv"))
Expand Down
13 changes: 8 additions & 5 deletions src/toffy/normalize.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,12 +214,13 @@ def pred_func(x):
return pred_func


def combine_run_metrics(run_dir, substring):
def combine_run_metrics(run_dir, substring, warn_overwrite=True):
"""Combines the specified metrics from different FOVs into a single file
Args:
run_dir (str): the directory containing the files
substring(str): the substring contained within the files to be combined"""
substring(str): the substring contained within the files to be combined
warn_overwrite (bool): whether to warn if existing `_combined.csv` file found"""

files = io_utils.list_files(run_dir, substring)

Expand All @@ -228,9 +229,11 @@ def combine_run_metrics(run_dir, substring):
raise ValueError("No files found in {}".format(run_dir))

if substring + "_combined.csv" in files:
warnings.warn(
"Removing previously generated combined {} file in {}".format(substring, run_dir)
)
if warn_overwrite:
print("Warning of overwriting!")
warnings.warn(
"Removing previously generated combined {} file in {}".format(substring, run_dir)
)
os.remove(os.path.join(run_dir, substring + "_combined.csv"))
files = [file for file in files if "combined" not in file]

Expand Down
9 changes: 8 additions & 1 deletion src/toffy/qc_comp.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import os
import pathlib
import re
import warnings
from dataclasses import dataclass, field
from shutil import rmtree
from typing import Dict, List, Optional, Tuple, Union
Expand Down Expand Up @@ -386,12 +387,14 @@ def compute_qc_metrics_direct(image_data, fov_name, gaussian_blur=False, blur_fa
return metric_csvs


def combine_qc_metrics(qc_metrics_dir):
def combine_qc_metrics(qc_metrics_dir, warn_overwrite=True):
"""Aggregates the QC results of each FOV into one `.csv`
Args:
qc_metrics_dir (str):
the name of the folder containing the QC metric files
warn_overwrite (bool):
whether to warn if existing combined CSV found for each metric
"""

# path validation check
Expand All @@ -417,6 +420,10 @@ def combine_qc_metrics(qc_metrics_dir):

# write the aggregated metric data
# NOTE: if this combined metric file already exists, it will be overwritten
if os.path.exists(os.path.join(qc_metrics_dir, "combined_%s.csv" % ms)) and warn_overwrite:
warnings.warn(
"Removing previously generated combined %s file in %s" % (ms, qc_metrics_dir)
)
metric_df.to_csv(os.path.join(qc_metrics_dir, "combined_%s.csv" % ms), index=False)


Expand Down
23 changes: 9 additions & 14 deletions src/toffy/watcher_callbacks.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,12 +34,14 @@
class RunCallbacks:
run_folder: str

def plot_qc_metrics(self, qc_out_dir: str, **kwargs):
def plot_qc_metrics(self, qc_out_dir: str, warn_overwrite=False, **kwargs):
"""Plots qc metrics generated by the `generate_qc` callback
Args:
qc_out_dir (str):
Directory containing qc metric csv
warn_overwrite (bool): whether to warn if existing `_combined.csv` file found,
needed to curb watcher output if `plot_qc_metrics` set as intermediate callback
**kwargs (Dict[str, Any]):
Additional arguments for `toffy.qc_comp.visualize_qc_metrics`.
Accepted kwargs are
Expand All @@ -57,20 +59,22 @@ def plot_qc_metrics(self, qc_out_dir: str, **kwargs):
viz_kwargs = {k: v for k, v in kwargs.items() if k in valid_kwargs}
qc_plots = {}

combine_qc_metrics(qc_out_dir)
combine_qc_metrics(qc_out_dir, warn_overwrite=warn_overwrite)
for metric_name in QC_COLUMNS:
qc_plots[metric_name] = visualize_qc_metrics(
metric_name, qc_out_dir, **viz_kwargs, return_plot=True
)

return qc_plots

def plot_mph_metrics(self, mph_out_dir, plot_dir, **kwargs):
def plot_mph_metrics(self, mph_out_dir, plot_dir, warn_overwrite=False, **kwargs):
"""Plots mph metrics generated by the `generate_mph` callback
Args:
mph_out_dir (str): directory containing mph metric csv
plot_dir (str): director to store the plot to
warn_overwrite (bool): whether to warn if existing `_combined.csv` file found,
needed to curb watcher output if `plot_mph_metrics` set as intermediate callback
**kwargs (Dict[str, Any]):
Additional arguments for `toffy.mph_comp.visualize_mph`.
Accepted kwargs are
Expand All @@ -89,7 +93,8 @@ def plot_mph_metrics(self, mph_out_dir, plot_dir, **kwargs):
]
viz_kwargs = {k: v for k, v in kwargs.items() if k in valid_kwargs}

mph_df = combine_mph_metrics(mph_out_dir, return_data=True)
# set verbose to false to prevent overwrite error from popping up each FOV
mph_df = combine_mph_metrics(mph_out_dir, return_data=True, warn_overwrite=warn_overwrite)
mph_fig = visualize_mph(mph_df, plot_dir, **viz_kwargs, return_plot=True)

return mph_fig
Expand Down Expand Up @@ -358,16 +363,6 @@ def build_callbacks(
run_callbacks + intermediate_callbacks if intermediate_callbacks else run_callbacks[:]
)

for run_cb in callbacks_with_prereq:
argnames = inspect.getfullargspec(getattr(RunCallbacks, run_cb))[0]
argnames = [argname for argname in argnames if argname != "self"]

misc_utils.verify_in_list(required_arguments=argnames, passed_arguments=list(kwargs.keys()))

callbacks_with_prereq = (
run_callbacks + intermediate_callbacks if intermediate_callbacks else run_callbacks[:]
)

for run_cb in callbacks_with_prereq:
argnames = inspect.getfullargspec(getattr(RunCallbacks, run_cb))[0]
argnames = [argname for argname in argnames if argname != "self"]
Expand Down
5 changes: 3 additions & 2 deletions templates/3a_monitor_MIBI_run.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@
" plot_dir=metrics_plot_dir,\n",
" save_dir=metrics_plot_dir,\n",
" panel=panel,\n",
" warn_overwrite=False,\n",
")"
]
},
Expand All @@ -128,7 +129,7 @@
],
"metadata": {
"kernelspec": {
"display_name": "toffy38",
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
Expand All @@ -142,7 +143,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.16"
"version": "3.9.16"
},
"vscode": {
"interpreter": {
Expand Down
3 changes: 3 additions & 0 deletions tests/fov_watcher_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,9 @@ def test_watcher(
kwargs["pulse_out_dir"] = pulse_out_dir
kwargs["plot_dir"] = plot_dir

# ensure warn_overwrite set to False if intermediate callbacks set, otherwise True
kwargs["warn_overwrite"] = True if int_cbs else False

run_data = os.path.join(tmpdir, "test_run")
log_out = os.path.join(tmpdir, "log_output")
os.makedirs(run_data)
Expand Down
14 changes: 13 additions & 1 deletion tests/mph_comp_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@

from toffy import mph_comp as mph

parametrize = pytest.mark.parametrize


def create_sample_mph_data(fov, mph_value, total_count, time):
data = pd.DataFrame(
Expand Down Expand Up @@ -89,7 +91,8 @@ def test_compute_mph_metrics():
assert csv_data.equals(mph_data)


def test_combine_mph_metrics():
@parametrize("warn_overwrite_test", [True, False])
def test_combine_mph_metrics(warn_overwrite_test):
bad_path = os.path.join(Path(__file__).parents[1], "data", "not-a-folder")

# bad directory path should raise an error
Expand All @@ -116,6 +119,15 @@ def test_combine_mph_metrics():
assert os.path.exists(combined_csv_path)
assert csv_data.equals(combined_data)

# test warn_overwrite flag
if warn_overwrite_test:
with pytest.warns(
UserWarning, match="Removing previously generated combined mph_pulse"
):
mph.combine_mph_metrics(csv_path, warn_overwrite_test)
else:
mph.combine_mph_metrics(csv_path, warn_overwrite_test)


def test_visualize_mph():
bad_path = os.path.join(Path(__file__).parents[1], "data", "not-a-folder")
Expand Down
11 changes: 8 additions & 3 deletions tests/normalize_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,8 @@ def test_create_prediction_function(obj_func, num_params):


@parametrize_with_cases("metrics", cases=test_cases.CombineRunMetricFiles)
def test_combine_run_metrics(metrics):
@parametrize("warn_overwrite_test", [True, False])
def test_combine_run_metrics(metrics, warn_overwrite_test):
with tempfile.TemporaryDirectory() as temp_dir:
for metric in metrics:
name, values_df = metric[0], pd.DataFrame(metric[1])
Expand All @@ -163,8 +164,12 @@ def test_combine_run_metrics(metrics):
assert len(combined_data) == len(metrics) * 10

# check that previously generated combined file is removed with warning
with pytest.warns(UserWarning, match="previously generated"):
normalize.combine_run_metrics(temp_dir, "pulse_height")
# NOTE: only if warn_overwrite turned on
if warn_overwrite_test:
with pytest.warns(UserWarning, match="previously generated"):
normalize.combine_run_metrics(temp_dir, "pulse_height", warn_overwrite_test)
else:
normalize.combine_run_metrics(temp_dir, "pulse_height", warn_overwrite_test)

# check that files with different lengths raises error
name, bad_vals = metrics[0][0], pd.DataFrame(metrics[0][1])
Expand Down
18 changes: 15 additions & 3 deletions tests/qc_comp_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,8 @@ def test_compute_qc_metrics(gaussian_blur, bin_file_folder, fovs):


@parametrize("fovs", [["fov-1-scan-1"], ["fov-1-scan-1", "fov-2-scan-1"]])
def test_combine_qc_metrics(fovs):
@parametrize("warn_overwrite_test", [True, False])
def test_combine_qc_metrics(fovs, warn_overwrite_test):
with tempfile.TemporaryDirectory() as temp_dir:
# bin folder error check
with pytest.raises(FileNotFoundError):
Expand Down Expand Up @@ -192,8 +193,19 @@ def test_combine_qc_metrics(fovs):
os.path.join(bin_file_path, "%s_%s.csv" % (fov, ms)), index=False
)

# run the aggregation function
qc_comp.combine_qc_metrics(bin_file_path)
# create a dummy file if testing the warn_overwrite flag
if warn_overwrite_test:
Path(os.path.join(bin_file_path, "combined_%s.csv" % settings.QC_SUFFIXES[0])).touch()

# run the aggregation function, testing that both warn_overwrite states work properly
if warn_overwrite_test:
with pytest.warns(
UserWarning,
match="Removing previously generated combined %s" % settings.QC_SUFFIXES[0],
):
qc_comp.combine_qc_metrics(bin_file_path, warn_overwrite_test)
else:
qc_comp.combine_qc_metrics(bin_file_path, warn_overwrite_test)

for ms, mv, mc in zip(settings.QC_SUFFIXES, metric_start_vals, settings.QC_COLUMNS):
# assert the combined QC metric file was created
Expand Down
3 changes: 3 additions & 0 deletions tests/watcher_callbacks_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ def test_build_fov_callback(callbacks, kwargs, data_path):
kwargs["mph_out_dir"] = file_dir
kwargs["pulse_out_dir"] = file_dir
kwargs["plot_dir"] = plot_dir
kwargs["warn_overwrite"] = False

run_data = os.path.join(tmp_dir, "tissue")
os.makedirs(run_data)
Expand Down Expand Up @@ -98,6 +99,8 @@ def test_build_callbacks(viz_mock, run_callbacks, inter_callbacks, kwargs, data_
if kwargs.get("save_dir", False):
kwargs["save_dir"] = qc_dir

kwargs["warn_overwrite"] = True if inter_callbacks else False

fcb, rcb, icb = watcher_callbacks.build_callbacks(
run_callbacks=run_callbacks, intermediate_callbacks=inter_callbacks, **kwargs
)
Expand Down

0 comments on commit 58fc665

Please sign in to comment.