Skip to content

Commit

Permalink
Mass proficient extraction for NAS storage (#342)
Browse files Browse the repository at this point in the history
* Initial commit of mass proficient extraction

* Add mass proficient MPH extraction

* Ensure bin_extraction also handles proficient metrics extraction

* Handle mass proficient and deficient bin file extraction separately (in case missing FOVs different for both)

* Add mass proficient MPH normalization generation to 4b notebook

* OCD comment change

* Ensure mass proficient panel passed to proficient extraction in 3b notebook

* Typo fix

* Ensure mass proficient directory actually gets created prior to extraction in notebook 3b

* Make comment about mass proficient extraction more clear

* Ensure pulse height generation doesn't attempt to read in proficient metric data

* Remove duplicate intermediate callback

* Typo in FOV callbacks

* Add flag to control proficient extraction or not

* Nuke proficient MPH extraction

* Timing tests for normalization

* Update pyproject.toml dependencies to be compatible with mibi-bin-tools and alpineer

* Standardize the deficient and proficient ranges based on agreed upon range values

* Delete timeit print statements for MPH

* Begin reducing test cases

* Document the start and stop offset params

* Update poetry lock file to ensure this actually installs

* Clarify comment about extraction ranges

* Fix another extraction window comment

* Make sure extract_prof is included as a param to watcher tests

* Add extract_prof param to inter_callback test

* Revert back to old deficient and proficient extraction ranges

* Pulse heights are not extracted for deficient and proficient data anymore

* Clarify comment
  • Loading branch information
alex-l-kong authored Aug 22, 2023
1 parent e1fa4d8 commit 49d26ab
Show file tree
Hide file tree
Showing 14 changed files with 942 additions and 417 deletions.
1,098 changes: 716 additions & 382 deletions poetry.lock

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ metadata = false
[tool.poetry.dependencies]
python = ">=3.9,<3.12"
alpineer = ">=0.1.9"
mibi-bin-tools = "0.2.9"
mibi-bin-tools = "0.2.10"
ipywidgets = "^8"
numpy = "1.*"
natsort = "^8"
Expand Down
12 changes: 10 additions & 2 deletions src/toffy/normalize.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,9 @@ def write_counts_per_mass(base_dir, output_dir, fov, masses, start_offset=0.5, s
out_df.to_csv(os.path.join(output_dir, fov + "_channel_counts.csv"), index=False)


def write_mph_per_mass(base_dir, output_dir, fov, masses, start_offset=0.5, stop_offset=0.5):
def write_mph_per_mass(
base_dir, output_dir, fov, masses, start_offset=0.5, stop_offset=0.5, proficient=False
):
"""Records the median pulse height (MPH) per mass for the specified FOV
Args:
Expand All @@ -57,6 +59,7 @@ def write_mph_per_mass(base_dir, output_dir, fov, masses, start_offset=0.5, stop
masses (list): the list of masses to extract MPH from
start_offset (float): beginning value for calculating mph values
stop_offset (float): ending value for calculating mph values
proficient (bool): whether proficient MPH data is written or not
"""
# hold computed values
mph_vals = []
Expand All @@ -72,7 +75,10 @@ def write_mph_per_mass(base_dir, output_dir, fov, masses, start_offset=0.5, stop
# create df to hold output
fovs = np.repeat(fov, len(masses))
out_df = pd.DataFrame({"mass": masses, "fov": fovs, "pulse_height": mph_vals})
out_df.to_csv(os.path.join(output_dir, fov + "_pulse_heights.csv"), index=False)
pulse_heights_file = (
fov + "_pulse_heights_proficient.csv" if proficient else fov + "_pulse_heights.csv"
)
out_df.to_csv(os.path.join(output_dir, pulse_heights_file), index=False)


def create_objective_function(obj_func):
Expand Down Expand Up @@ -222,7 +228,9 @@ def combine_run_metrics(run_dir, substring, warn_overwrite=True):
substring(str): the substring contained within the files to be combined
warn_overwrite (bool): whether to warn if existing `_combined.csv` file found"""

# retrieve all pulse height files, but ignore anything mass proficient
files = io_utils.list_files(run_dir, substring)
files = [file for file in files if "_proficient" not in file]

# validate inputs
if len(files) == 0:
Expand Down
35 changes: 35 additions & 0 deletions src/toffy/panel_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,41 @@
)


def modify_panel_ranges(panel: pd.DataFrame, start_offset: float = 0, stop_offset: float = 0):
"""Adjust the offsets of a given panel.
Only applicable for masses with ranges separated by 0.3 between 'Stop' and 'Start'.
Args:
panel (pd.DataFrame): panel dataframe with columns Mass, Target, Start, and Stop.
start_offset (float): the value to add to the `'Start'` column.
stop_offset (float): the value to add to the `'Stop'` column.
Returns:
pd.DataFrame:
Updated panel with `start_offset` added to `'Start`' column,
likewise for `stop_offset` and `'Stop'` column.
"""
panel_new = panel.copy()

# extract only rows where 'Start' - 'End' = -0.3, round to account for floating point error
panel_rows_modify = panel_new[
(panel_new["Start"] - panel_new["Stop"]).round(1) == -0.3
].index.values

# add start_offset to 'Start' column
panel_new.loc[panel_rows_modify, "Start"] = (
panel_new.loc[panel_rows_modify, "Start"].copy() + start_offset
)

# add stop_offset to 'Stop' column
panel_new.loc[panel_rows_modify, "Stop"] = (
panel_new.loc[panel_rows_modify, "Stop"].copy() + stop_offset
)

return panel_new


def merge_duplicate_masses(panel):
"""Check a panel df for duplicate mass values and return a unique mass panel with the
target names combined
Expand Down
56 changes: 47 additions & 9 deletions src/toffy/watcher_callbacks.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from toffy.image_stitching import stitch_images
from toffy.mph_comp import combine_mph_metrics, compute_mph_metrics, visualize_mph
from toffy.normalize import write_mph_per_mass
from toffy.panel_utils import modify_panel_ranges
from toffy.qc_comp import combine_qc_metrics, compute_qc_metrics_direct
from toffy.qc_metrics_plots import visualize_qc_metrics
from toffy.settings import QC_COLUMNS
Expand Down Expand Up @@ -124,11 +125,14 @@ class FovCallbacks:
point_name: str
overwrite: bool
__panel: pd.DataFrame = field(default=None, init=False)
__panel_prof: pd.DataFrame = field(default=None, init=False)
__fov_data: xr.DataArray = field(default=None, init=False)
__fov_data_prof: xr.DataArray = field(default=None, init=False)

def _generate_fov_data(
self,
panel: pd.DataFrame,
extract_prof: bool,
intensities=["Au", "chan_39"],
replace=True,
time_res=0.0005,
Expand All @@ -138,9 +142,13 @@ def _generate_fov_data(
The data and the panel are then cached members of the FovCallbacks object
Both the deficient and proficient extracted data and panel are computed and cached
Args:
panel (pd.DataFrame):
Panel used for extraction
extract_prof (bool):
If set, extract proficient data
intensities (bool | List[str]):
Intensities argument for `mibi_bin_tools.bin_files.extract_bin_files`
replace (bool):
Expand All @@ -159,17 +167,37 @@ def _generate_fov_data(
replace=replace,
time_res=time_res,
)

self.__panel = panel

def extract_tiffs(self, tiff_out_dir: str, panel: pd.DataFrame, **kwargs):
if extract_prof:
# adds an offset of 0.3 to 'Start' and 'Stop' columns, modifying extraction range
# from (-0.3, 0) to (0, 0.3) for proficient extraction
panel_prof = modify_panel_ranges(panel, start_offset=0.3, stop_offset=0.3)
self.__fov_data_prof = extract_bin_files(
data_dir=self.run_folder,
out_dir=None,
include_fovs=[self.point_name],
panel=panel_prof,
intensities=intensities,
replace=replace,
time_res=time_res,
)
self.__panel_prof = panel_prof

def extract_tiffs(
self, tiff_out_dir: str, panel: pd.DataFrame, extract_prof: bool = True, **kwargs
):
"""Extract tiffs into provided directory, using given panel
Done for both the extracted deficient and proficient data
Args:
tiff_out_dir (str):
Path where tiffs are written
panel (pd.DataFrame):
Target mass integration ranges
extract_prof (bool):
If set, extract mass proficient data
**kwargs (dict):
Additional arguments for `mibi_bin_tools.bin_files.extract_bin_files`.
Accepted kwargs are
Expand Down Expand Up @@ -199,8 +227,8 @@ def extract_tiffs(self, tiff_out_dir: str, panel: pd.DataFrame, **kwargs):
unextracted_chans = io_utils.remove_file_extensions(unextracted_chan_tiffs)
panel = panel[panel["Target"].isin(unextracted_chans)]

if self.__fov_data is None:
self._generate_fov_data(panel, **kwargs)
if self.__fov_data is None or self.__fov_data_prof is None:
self._generate_fov_data(panel, extract_prof, **kwargs)

intensities = kwargs.get("intensities", ["Au", "chan_39"])
if any_true(intensities) and type(intensities) is not list:
Expand All @@ -214,14 +242,27 @@ def extract_tiffs(self, tiff_out_dir: str, panel: pd.DataFrame, **kwargs):
intensities=intensities,
)

def generate_qc(self, qc_out_dir: str, panel: pd.DataFrame = None, **kwargs):
if extract_prof:
_write_out(
img_data=self.__fov_data_prof[0, :, :, :, :].values,
out_dir=tiff_out_dir + "_proficient",
fov_name=self.point_name,
targets=list(self.__fov_data.channel.values),
intensities=intensities,
)

def generate_qc(
self, qc_out_dir: str, panel: pd.DataFrame = None, extract_prof: bool = True, **kwargs
):
"""Generates qc metrics from given panel, and saves output to provided directory
Args:
qc_out_dir (str):
Path where qc_metrics are written
panel (pd.DataFrame):
Target mass integration ranges
extract_prof (bool):
If set, extract mass proficient data
**kwargs (dict):
Additional arguments for `toffy.qc_comp.compute_qc_metrics`. Accepted kwargs are:
Expand All @@ -234,7 +275,7 @@ def generate_qc(self, qc_out_dir: str, panel: pd.DataFrame = None, **kwargs):
if self.__fov_data is None:
if panel is None:
raise ValueError("Must provide panel if fov data is not already generated...")
self._generate_fov_data(panel, **kwargs)
self._generate_fov_data(panel, extract_prof, **kwargs)

qc_metric_paths = [
os.path.join(qc_out_dir, f"{self.point_name}_nonzero_mean_stats.csv"),
Expand Down Expand Up @@ -267,7 +308,6 @@ def generate_mph(self, mph_out_dir, **kwargs):
- mass_start
- mass_stop
"""

if not os.path.exists(mph_out_dir):
os.makedirs(mph_out_dir)

Expand Down Expand Up @@ -297,7 +337,6 @@ def generate_pulse_heights(self, pulse_out_dir: str, panel: pd.DataFrame = None,
- start_offset
- stop_offset
"""

if not os.path.exists(pulse_out_dir):
os.makedirs(pulse_out_dir)

Expand Down Expand Up @@ -422,7 +461,6 @@ def run_callback(run_folder: str):
# unreachable...
raise ValueError(f"Could not locate attribute {run_cb} in RunCallbacks object")

intermediate_callback = None
intermediate_callback = None
if intermediate_callbacks:

Expand Down
9 changes: 7 additions & 2 deletions templates/3a_monitor_MIBI_run.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,8 @@
"## Required variables\n",
"\n",
" - `run_name` should contain the exact name of the MIBI run that will be monitored\n",
" - `panel_path` should point to a panel csv specifying the targets on your panel. You can download your panel online from the Ionpath MibiTracker and then copy the file to the `C:\\\\Users\\\\Customer.ION\\\\Documents\\panel_files` directory (see [panel format](https://github.com/angelolab/toffy#panel-format) for more information)"
" - `panel_path` should point to a panel csv specifying the targets on your panel. You can download your panel online from the Ionpath MibiTracker and then copy the file to the `C:\\\\Users\\\\Customer.ION\\\\Documents\\panel_files` directory (see [panel format](https://github.com/angelolab/toffy#panel-format) for more information)\n",
" - `extract_prof` indicates whether you want to include mass proficient extraction on top of the default (mass deficient) extraction"
]
},
{
Expand All @@ -43,7 +44,10 @@
"run_name = 'YYYY-MM-DD_run_name'\n",
"\n",
"# path to user panel\n",
"panel_path = 'C:\\\\Users\\\\Customer.ION\\\\Documents\\\\panel_files\\\\my_cool_panel.csv'"
"panel_path = 'C:\\\\Users\\\\Customer.ION\\\\Documents\\\\panel_files\\\\my_cool_panel.csv'\n",
"\n",
"# whether to include proficient extraction or not\n",
"extract_prof = True"
]
},
{
Expand Down Expand Up @@ -106,6 +110,7 @@
" run_callbacks = ['image_stitching'],\n",
" intermediate_callbacks = ['plot_qc_metrics', 'plot_mph_metrics'],\n",
" fov_callbacks = ['extract_tiffs', 'generate_pulse_heights'],\n",
" extract_prof=extract_prof,\n",
" tiff_out_dir=extraction_dir,\n",
" qc_out_dir=metrics_data_dir,\n",
" mph_out_dir=metrics_data_dir,\n",
Expand Down
25 changes: 19 additions & 6 deletions templates/3b_extract_images_from_bin.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
"source": [
"import os\n",
"\n",
"from toffy.panel_utils import load_panel\n",
"from toffy.panel_utils import modify_panel_ranges, load_panel\n",
"from toffy.bin_extraction import extract_missing_fovs"
]
},
Expand All @@ -35,7 +35,8 @@
"## Required variables\n",
"You will need to define the following two arguments for this notebook.\n",
" - `run_name` should contain the exact name of the MIBI run to extract from\n",
" - `panel_path` should point to a panel csv specifying the targets on your panel. You can download your panel online from the Ionpath MibiTracker and then copy the file to the `C:\\\\Users\\\\Customer.ION\\\\Documents\\panel_files` directory (see [panel format](https://github.com/angelolab/toffy#panel-format) for more information)"
" - `panel_path` should point to a panel csv specifying the targets on your panel. You can download your panel online from the Ionpath MibiTracker and then copy the file to the `C:\\\\Users\\\\Customer.ION\\\\Documents\\panel_files` directory (see [panel format](https://github.com/angelolab/toffy#panel-format) for more information)\n",
" - `extract_prof` indicates whether you want to include mass proficient extraction on top of the default (mass deficient) extraction"
]
},
{
Expand All @@ -49,7 +50,8 @@
"source": [
"# set up args for current run\n",
"run_name = 'YYYY-MM-DD_run_name'\n",
"panel_path = 'C:\\\\Users\\\\Customer.ION\\\\Documents\\\\panel_files\\\\my_cool_panel.csv'"
"panel_path = 'C:\\\\Users\\\\Customer.ION\\\\Documents\\\\panel_files\\\\my_cool_panel.csv'\n",
"extract_prof = True"
]
},
{
Expand Down Expand Up @@ -78,7 +80,13 @@
"# path to directory containing extracted files\n",
"extraction_dir = os.path.join('D:\\\\Extracted_Images', run_name) \n",
"if not os.path.exists(extraction_dir):\n",
" os.makedirs(extraction_dir)"
" os.makedirs(extraction_dir)\n",
"\n",
"if extract_prof:\n",
" # path to directory containing mass-proficient extracted files (for long-term storage)\n",
" extraction_prof_dir = os.path.join('D:\\\\Extracted_Images', run_name + '_proficient')\n",
" if not os.path.exists(extraction_prof_dir):\n",
" os.makedirs(extraction_prof_dir)"
]
},
{
Expand All @@ -97,7 +105,12 @@
"metadata": {},
"outputs": [],
"source": [
"extract_missing_fovs(base_dir, extraction_dir, panel)"
"# base deficient extraction\n",
"extract_missing_fovs(base_dir, extraction_dir, panel)\n",
"\n",
"# mass proficient extraction (for long-term storage)\n",
"if extract_prof:\n",
" extract_missing_fovs(base_dir, extraction_prof_dir, modify_panel_ranges(panel, start_offset=0.3, stop_offset=0.3))"
]
}
],
Expand All @@ -117,7 +130,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.16"
"version": "3.9.16"
}
},
"nbformat": 4,
Expand Down
2 changes: 2 additions & 0 deletions templates/4b_normalize_image_data.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,8 @@
"for fov in fovs:\n",
" # generate mph values\n",
" mph_file_path = os.path.join(mph_run_dir, fov + '_pulse_heights.csv')\n",
" \n",
" # base MPH normalization\n",
" if not os.path.exists(mph_file_path):\n",
" normalize.write_mph_per_mass(base_dir=os.path.join(bin_base_dir, run_name), output_dir=mph_run_dir, \n",
" fov=fov, masses=panel['Mass'].values, start_offset=0.3, stop_offset=0)"
Expand Down
9 changes: 9 additions & 0 deletions tests/fov_watcher_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,6 +242,7 @@ def test_watcher(
add_blank,
temp_bin,
):
print("The watcher start lag is: %d" % watcher_start_lag)
try:
with tempfile.TemporaryDirectory() as tmpdir:
tiff_out_dir = os.path.join(tmpdir, "cb_0", RUN_DIR_NAME)
Expand Down Expand Up @@ -378,6 +379,14 @@ def test_watcher(

# extract tiffs check
validators[0](os.path.join(tmpdir, "cb_0", RUN_DIR_NAME), fovs, bad_fovs)
if kwargs["extract_prof"]:
validators[0](
os.path.join(tmpdir, "cb_0", RUN_DIR_NAME + "_proficient"), fovs, bad_fovs
)
else:
assert not os.path.exists(
os.path.join(tmpdir, "cb_0", RUN_DIR_NAME) + "_proficient"
)

# qc check
validators[1](os.path.join(tmpdir, "cb_1", RUN_DIR_NAME), fovs, bad_fovs)
Expand Down
Loading

0 comments on commit 49d26ab

Please sign in to comment.