Skip to content

Commit

Permalink
remove show_report capabilities and cosmicqc dep (#21)
Browse files Browse the repository at this point in the history
  • Loading branch information
d33bs authored Nov 7, 2024
1 parent aab474c commit a22bb0d
Show file tree
Hide file tree
Showing 5 changed files with 3 additions and 367 deletions.
2 changes: 1 addition & 1 deletion media/coverage-badge.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
57 changes: 2 additions & 55 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 0 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,6 @@ pywavelets = [
]
# used for image modifications in cytodataframe
opencv-python = "^4.10.0.84"
# used for report visualizations from within cytodataframe
cosmicqc = "^0.0.11"
imagecodecs = "^2024.9.22"

[tool.poetry.group.dev.dependencies]
Expand Down
251 changes: 0 additions & 251 deletions src/cytodataframe/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,7 @@

import base64
import pathlib
import random
import re
import webbrowser
from io import BytesIO, StringIO
from typing import (
Any,
Expand All @@ -22,15 +20,10 @@

import numpy as np
import pandas as pd
import plotly
import plotly.colors as pc
import plotly.express as px
import plotly.graph_objects as go
import skimage
import skimage.io
import skimage.measure
from IPython import get_ipython
from jinja2 import Environment, FileSystemLoader
from pandas._config import (
get_option,
)
Expand Down Expand Up @@ -359,250 +352,6 @@ def is_notebook_or_lab() -> bool:
except NameError:
return False

def show_report(
self: CytoDataFrame_type,
report_path: Optional[str] = None,
auto_open: bool = True,
color_palette: Optional[List[str]] = None,
) -> None:
"""
Generates and displays a report based on the current DataFrame's data
quality control (DQC) columns.
This method organizes the DQC columns from the DataFrame, creates
visualizations for each threshold set, and then either displays the
visualizations inline (if running in a Jupyter notebook or lab) or
opens an HTML report in the default web browser.
Args:
report_path (Optional[str]):
The file path where the HTML report should be saved and displayed.
If `None`, the report will be displayed inline if in a notebook
or lab environment.
auto_open: bool:
Whether to automatically open the report.
color_palette Optional(List[str]):
Optional list for color palette to use.
Raises:
ValueError: If the DataFrame does not contain any DQC columns.
"""

# find all cosmicqc columns in the data using the prefix `cqc.`
cqc_cols = [col for col in self.columns.tolist() if "cqc." in col]
# organize column data into the threshold set name, threshold is_outlier col,
# and the threshold score columns as list
organized_columns = [
[
# name of the threshold set
threshold_set,
# column which includes boolean is_outlier data for threshold set
next(
(
col
for col in cqc_cols
if f"cqc.{threshold_set}.is_outlier" in col
),
None,
),
# columns which show the data associated with thresholds
[col for col in cqc_cols if f"cqc.{threshold_set}.Z_Score." in col],
]
for threshold_set in sorted({col.split(".")[1] for col in cqc_cols})
]

# create figures for visualization based on the name, outlier status,
# and threshold scores
figures = [
self.create_threshold_set_outlier_visualization(
df=self,
threshold_set_name=set_name,
col_outlier=col_outlier,
cols_threshold_scores=cols_threshold_scores,
color_palette=color_palette,
)
for set_name, col_outlier, cols_threshold_scores in organized_columns
]

# if we're running in a notebook or jupyter lab, show the figures as-is
if self.is_notebook_or_lab() or report_path is None:
# if we should automatically open, show the figures
if auto_open:
for figure in figures:
figure.show()

return figures

# otherwise, create an html file with figures and open it with default browser
else:
html_path = self.create_figure_group_html(
figures=figures, report_path=report_path
)

# if we should auto open, show the html file in default web browser
if auto_open:
webbrowser.open(f"file://{pathlib.Path(html_path).resolve()}")

print(f"Opened default web browser for report {html_path}")

return html_path

@staticmethod
def create_figure_group_html(
figures: List[plotly.graph_objs._figure.Figure],
report_path: Optional[str] = None,
) -> str:
"""
Generates an HTML file containing multiple Plotly figures.
This method takes a list of Plotly figure objects, converts them to HTML,
and embeds them into a template HTML file. The resulting HTML file is then
saved to the specified path.
Args:
figures (List[plotly.graph_objs._figure.Figure]):
A list of Plotly figure objects to be included in the HTML report.
report_path (str):
The file path where the HTML report will be saved.
Defaults to "cosmicqc_outlier_report.html" when None.
Returns:
str: The path to the saved HTML report.
"""

# if we have none for the report path, use a default name.
if report_path is None:
report_path = "cosmicqc_outlier_report.html"

# create wrapped html for figures
figure_html = "".join(
[
f"<div class='fig_wrapper'>{fig.to_html(full_html=False)}</div>"
for fig in figures
]
)

# configure jinja environment
env = Environment(
loader=FileSystemLoader(f"{pathlib.Path(__file__).parent!s}/data")
)
# load a jinja template
template = env.get_template("report_template.html")

# Render the template with Plotly figure HTML
rendered_html = template.render(figure_html=figure_html)

# write the html to file
with open(report_path, "w") as f:
f.write(rendered_html)

# return the path of the file
return report_path

def create_threshold_set_outlier_visualization(
self: CytoDataFrame_type,
df: pd.DataFrame,
threshold_set_name: str,
col_outlier: str,
cols_threshold_scores: List[str],
color_palette: Optional[List[str]] = None,
) -> plotly.graph_objs._figure.Figure:
"""
Creates a Plotly figure visualizing the Z-score distributions and outliers
for a given threshold set.
This method generates histograms for each Z-score column in the given DataFrame,
colors them based on outlier status, and overlays them into a single figure.
Args:
df (pd.DataFrame):
The DataFrame containing the data to be visualized.
threshold_set_name (str):
The name of the threshold set being visualized.
col_outlier (str):
The column name indicating outlier status.
cols_threshold_scores (List[str]):
A list of column names representing the Z-scores to be visualized.
color_palette Optional(List[str]):
Optional list for color palette to use.
Defaults to use Dark24 color palette from Plotly.
Returns:
plotly.graph_objs._figure.Figure:
A Plotly figure object containing the visualization.
"""

# Create histograms using plotly.express with pattern_shape and random color
figures = [
px.histogram(
df,
x=col,
color=col_outlier,
nbins=50,
pattern_shape=col_outlier,
opacity=0.7,
)
for col in cols_threshold_scores
]

# Create a combined figure
fig = go.Figure()

# check that we have enough colors for figures if provided
if color_palette is not None and len(color_palette) < len(figures):
raise ReferenceError(
f"Color palette length must match figure length of {len(figures)}."
)

# Add traces from each histogram and modify colors, names, and pattern shapes
for idx, fig_hist in enumerate(figures):
if color_palette is None:
# Create a default list of colors from a Plotly color palette
fig_color = random.choice(pc.qualitative.Dark24)
else:
# otherwise, use static color palette based on the number of figures
fig_color = color_palette[idx]

for trace in fig_hist.data:
trace.marker.color = fig_color
trace.marker.pattern.shape = (
"x" if trace.name == "True" else ""
) # Use pattern shapes
renamed_col = cols_threshold_scores[idx].replace(
f"cqc.{threshold_set_name}.Z_Score.", ""
)
trace.name = (
f"{renamed_col} ({'outlier' if trace.name == 'True' else 'inlier'})"
)
# Update hovertemplate to match the name in the key
trace.hovertemplate = (
f"<b>{renamed_col}</b><br>"
+ "Z-Score: %{x}<br>"
+ "Single-cell Count (log): %{y}<br>"
+ "<extra></extra>"
)
fig.add_trace(trace)

# Update layout
fig.update_layout(
title=f"{threshold_set_name.replace('_', ' ').title()} Z-Score Outliers",
xaxis_title="Z-Score",
yaxis_title="Single-cell Count (log)",
yaxis_type="log",
# ensures that histograms are overlapping
barmode="overlay",
legend_title_text="Measurement Type and QC Status",
legend={
"orientation": "v",
"yanchor": "top",
"y": 0.95,
"xanchor": "left",
"x": 1.02,
},
)

return fig

def find_image_columns(self: CytoDataFrame_type) -> bool:
pattern = r".*\.(tif|tiff)$"
return [
Expand Down
Loading

0 comments on commit a22bb0d

Please sign in to comment.