-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
3 changed files
with
241 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,115 @@ | ||
from typing import Iterable, Union | ||
|
||
import numpy as np | ||
import pandas as pd | ||
import xarray as xr | ||
|
||
|
||
def ds2df(ds: xr.Dataset, fov: int, cell_index_start: int = 0, filename=None) -> pd.DataFrame: | ||
""" | ||
Convert a single dataset into a dataframe. | ||
Parameters | ||
---------- | ||
ds : xr.Dataset | ||
The dataset to convert | ||
fov : int | ||
The fov index. | ||
cell_index_start : int | ||
The offset of the cell index. This is necessary | ||
if you want to easily combine multiple datasets into a | ||
single dataframe down the road | ||
filename : str, optional | ||
The filename to add to an 'fname' column. | ||
Returns | ||
------- | ||
df : dataframe | ||
""" | ||
pts_cell = ds.pts_cell | ||
mult = ds.attrs["multiplier"] | ||
indx = pd.MultiIndex.from_product( | ||
(np.arange(int(pts_cell.shape[0] / mult)) + cell_index_start, np.arange(mult)), | ||
names=("cell", "sub-cell"), | ||
) | ||
df = pd.DataFrame(ds["cell_raman"][0], index=indx) | ||
df["x"] = ds["cell_points"][:, 0] | ||
df["y"] = ds["cell_points"][:, 1] | ||
|
||
cell_points = ds["cell_points"] * 2048 | ||
bkd_points = ds["bkd_points"] * 2048 | ||
cell_raman = ds["cell_raman"] - 608 | ||
bkd_raman = ds["bkd_raman"] - 608 | ||
|
||
thres = 140 | ||
|
||
cell_com = cell_points.to_numpy().astype(int) | ||
gfp_int = np.asarray([ds["img"][1, 0, x[0], x[1]].values for x in cell_com]) | ||
|
||
df["gfp_int"] = gfp_int | ||
df["fov"] = fov | ||
if filename is not None: | ||
df["fname"] = filename | ||
return df | ||
|
||
|
||
|
||
|
||
def glob2df( | ||
files: Union[Iterable[str], str], | ||
conditions, | ||
threshold: float, | ||
well_number: int = 0, | ||
cell_index_start: int = 0, | ||
verbose: bool = True, | ||
) -> pd.DataFrame: | ||
""" | ||
Parameters | ||
---------- | ||
files : iterable, str | ||
An iterable of filenames, or a glob string to use to find the files. | ||
conditions : tuple(str, str) | ||
The two conditions in this dataset, should be listed | ||
in order of (undyed condition, dyed condition) | ||
threshold : float | ||
The threshold gfp value for determining dyed vs undyed cell | ||
well_number : int, default 0 | ||
What well number to put in the dataframe | ||
cell_index_start : int | ||
The offset of the cell index. This is necessary | ||
if you want to easily combine multiple datasets into a | ||
single dataframe down the road | ||
verbose : bool, default True | ||
Whether to print out the file names as they are opened. | ||
Returns | ||
------- | ||
df : pd.DataFrame | ||
images : xr.DataArray | ||
""" | ||
if isinstance(files, str): | ||
files = glob(files) | ||
dfs = [] | ||
images = [] | ||
for fov, f in enumerate(files): | ||
if verbose: | ||
print(f) | ||
ds = xr.open_dataset(f) | ||
dfs.append(ds2df(ds, fov, cell_index_start, filename=f)) | ||
cell_index_start = dfs[-1].index.max()[0] + 1 | ||
images.append(ds["img"]) | ||
images = xr.concat(images, dim="fov") | ||
df = pd.concat(dfs) | ||
|
||
# determine condition and update multiindex | ||
cond_idx = [((df["gfp_int"] > threshold).groupby("cell").mean() > 0.5).astype(int)] | ||
df["cond"] = np.broadcast_to(np.atleast_2d(np.array(conditions)).T, (2, 13))[ | ||
tuple(cond_idx) | ||
].ravel() | ||
# df["cond"] = np.array([[condition] * 13, ["N"] * 13])[tuple(cond_idx)].ravel() | ||
df.set_index("cond", append=True, inplace=True) | ||
df = df.reorder_levels( | ||
["cond", "cell", "sub-cell"], | ||
) | ||
df["well"] = well_number | ||
return df, images |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,123 @@ | ||
from __future__ import annotations | ||
|
||
import numpy as np | ||
import pandas as pd | ||
from scipy.signal import find_peaks | ||
|
||
__all__ = ["find_cosmic_rays", "remove_cosmic_rays", "group_spectra_points"] | ||
|
||
|
||
def find_cosmic_rays( | ||
spectra: np.ndarray, ignore_region: tuple[int, int] = (200, 400), **kwargs | ||
) -> np.ndarray: | ||
""" | ||
Find the indices of cosmic rays. | ||
Parameters | ||
---------- | ||
spectra : (N, 1340) | ||
The arraylike of spectr to search through | ||
ignore_region : (int, int) | ||
A region to not worry about cosmic rays in. | ||
**kwargs : | ||
Passed to scipy.signal.find_peaks | ||
Returns | ||
------- | ||
idx : (M, 2) np.ndarray | ||
The indices of which spectra+pixels have detected cosmic rays. | ||
The first column contains which spectra, the second which pixel. | ||
""" | ||
spectra = np.atleast_2d(spectra) | ||
idx = [] | ||
min_ignore = min(ignore_region) | ||
max_ignore = max(ignore_region) | ||
threshold = kwargs.pop("threshold", 75) | ||
prominence = kwargs.pop("prominence", 100) | ||
for s, spec in enumerate(spectra): | ||
peaks, _ = find_peaks( | ||
spec, threshold=threshold, prominence=prominence, **kwargs | ||
) | ||
for p in peaks: | ||
if min_ignore < p < max_ignore: | ||
continue | ||
idx.append((s, p)) | ||
return np.asarray(idx) | ||
|
||
|
||
def remove_cosmic_rays(df: pd.DataFrame, plot: bool = False, **kwargs) -> pd.DataFrame: | ||
""" | ||
Process a dataframe by removing all spectra with detected cosmic rays. | ||
Parameters | ||
---------- | ||
df : pd.DataFrame | ||
The dataframe with spectra components as the first 1340 columns | ||
plot : bool | ||
Whether to generate a plot showing which spectra were removed. | ||
**kwargs | ||
Passed to scipy.signal.find_peaks | ||
Returns | ||
------- | ||
pd.DataFrame | ||
The spectra dataframe with spectra with cosmic rays removed. | ||
""" | ||
spectra = df.iloc[:, :1340].values | ||
|
||
cosmic_idx = find_cosmic_rays(spectra, **kwargs) | ||
keep_idx = np.setdiff1d(np.arange(spectra.shape[0]), cosmic_idx) | ||
if plot: | ||
import matplotlib.pyplot as plt | ||
|
||
unique_cosmic, offset_idx = np.unique(cosmic_idx[:, 0], return_inverse=True) | ||
offsets = np.arange(unique_cosmic.shape[0]) * 100 | ||
fig, axs = plt.subplots(1, 2, figsize=(10, 6)) | ||
axs[0].set_title("Post Removal") | ||
axs[0].plot(spectra[keep_idx].T, alpha=0.75) | ||
|
||
axs[1].plot(spectra[unique_cosmic].T + offsets) | ||
axs[1].plot( | ||
cosmic_idx[:, 1], | ||
spectra[cosmic_idx[:, 0], cosmic_idx[:, 1]] + offsets[offset_idx], | ||
"rx", | ||
markersize=10, | ||
label="detected cosmic rays", | ||
mew=5, | ||
) | ||
axs[1].legend() | ||
|
||
axs[1].set_title("Post removal - with offset") | ||
return df.iloc[keep_idx] | ||
|
||
|
||
def group_spectra_points(df, multiplier: int) -> pd.DataFrame: | ||
""" | ||
Add which point each spectra is from to the multiindex. | ||
Parameters | ||
---------- | ||
df : pd.DataFrame | ||
The raman dataframe. Should be organized as T, P, type | ||
multiplier : int | ||
How many subspectra per point | ||
Returns | ||
------- | ||
pd.DataFrame | ||
the original dataframe with pt_label appended to multiindex | ||
""" | ||
offset = 0 | ||
for pos, pos_df in df.groupby(level=1): | ||
for t, tp_df in pos_df.groupby(level=0): | ||
n_pts = int(len(tp_df) / multiplier) | ||
pt_labels = ( | ||
np.broadcast_to( | ||
np.arange(n_pts, dtype=int)[:, None], (n_pts, multiplier) | ||
).ravel() | ||
+ offset | ||
) | ||
df.loc[(t, pos), "pt"] = pt_labels | ||
offset = df.loc[pos, "pt"].max() | ||
df["pt"] = df["pt"].astype(int) | ||
return df.set_index("pt", append=True) |