remove show_report capabilities and cosmicqc dep (#21)

WayScience · Nov 7, 2024 · a22bb0d · a22bb0d
1 parent aab474c
commit a22bb0d
Show file tree

Hide file tree

Showing 5 changed files with 3 additions and 367 deletions.
diff --git a/media/coverage-badge.svg b/media/coverage-badge.svg
diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -38,8 +38,6 @@ pywavelets = [
 ]
 # used for image modifications in cytodataframe
 opencv-python = "^4.10.0.84"
-# used for report visualizations from within cytodataframe
-cosmicqc = "^0.0.11"
 imagecodecs = "^2024.9.22"
 
 [tool.poetry.group.dev.dependencies]

diff --git a/src/cytodataframe/frame.py b/src/cytodataframe/frame.py
@@ -4,9 +4,7 @@
 
 import base64
 import pathlib
-import random
 import re
-import webbrowser
 from io import BytesIO, StringIO
 from typing import (
     Any,
@@ -22,15 +20,10 @@
 
 import numpy as np
 import pandas as pd
-import plotly
-import plotly.colors as pc
-import plotly.express as px
-import plotly.graph_objects as go
 import skimage
 import skimage.io
 import skimage.measure
 from IPython import get_ipython
-from jinja2 import Environment, FileSystemLoader
 from pandas._config import (
     get_option,
 )
@@ -359,250 +352,6 @@ def is_notebook_or_lab() -> bool:
         except NameError:
             return False
 
-    def show_report(
-        self: CytoDataFrame_type,
-        report_path: Optional[str] = None,
-        auto_open: bool = True,
-        color_palette: Optional[List[str]] = None,
-    ) -> None:
-        """
-        Generates and displays a report based on the current DataFrame's data
-        quality control (DQC) columns.
-
-        This method organizes the DQC columns from the DataFrame, creates
-        visualizations for each threshold set, and then either displays the
-        visualizations inline (if running in a Jupyter notebook or lab) or
-        opens an HTML report in the default web browser.
-
-        Args:
-            report_path (Optional[str]):
-                The file path where the HTML report should be saved and displayed.
-                If `None`, the report will be displayed inline if in a notebook
-                or lab environment.
-            auto_open: bool:
-                Whether to automatically open the report.
-            color_palette Optional(List[str]):
-                Optional list for color palette to use.
-
-        Raises:
-            ValueError: If the DataFrame does not contain any DQC columns.
-        """
-
-        # find all cosmicqc columns in the data using the prefix `cqc.`
-        cqc_cols = [col for col in self.columns.tolist() if "cqc." in col]
-        # organize column data into the threshold set name, threshold is_outlier col,
-        # and the threshold score columns as list
-        organized_columns = [
-            [
-                # name of the threshold set
-                threshold_set,
-                # column which includes boolean is_outlier data for threshold set
-                next(
-                    (
-                        col
-                        for col in cqc_cols
-                        if f"cqc.{threshold_set}.is_outlier" in col
-                    ),
-                    None,
-                ),
-                # columns which show the data associated with thresholds
-                [col for col in cqc_cols if f"cqc.{threshold_set}.Z_Score." in col],
-            ]
-            for threshold_set in sorted({col.split(".")[1] for col in cqc_cols})
-        ]
-
-        # create figures for visualization based on the name, outlier status,
-        # and threshold scores
-        figures = [
-            self.create_threshold_set_outlier_visualization(
-                df=self,
-                threshold_set_name=set_name,
-                col_outlier=col_outlier,
-                cols_threshold_scores=cols_threshold_scores,
-                color_palette=color_palette,
-            )
-            for set_name, col_outlier, cols_threshold_scores in organized_columns
-        ]
-
-        # if we're running in a notebook or jupyter lab, show the figures as-is
-        if self.is_notebook_or_lab() or report_path is None:
-            # if we should automatically open, show the figures
-            if auto_open:
-                for figure in figures:
-                    figure.show()
-
-            return figures
-
-        # otherwise, create an html file with figures and open it with default browser
-        else:
-            html_path = self.create_figure_group_html(
-                figures=figures, report_path=report_path
-            )
-
-            # if we should auto open, show the html file in default web browser
-            if auto_open:
-                webbrowser.open(f"file://{pathlib.Path(html_path).resolve()}")
-
-            print(f"Opened default web browser for report {html_path}")
-
-            return html_path
-
-    @staticmethod
-    def create_figure_group_html(
-        figures: List[plotly.graph_objs._figure.Figure],
-        report_path: Optional[str] = None,
-    ) -> str:
-        """
-        Generates an HTML file containing multiple Plotly figures.
-
-        This method takes a list of Plotly figure objects, converts them to HTML,
-        and embeds them into a template HTML file. The resulting HTML file is then
-        saved to the specified path.
-
-        Args:
-            figures (List[plotly.graph_objs._figure.Figure]):
-                A list of Plotly figure objects to be included in the HTML report.
-            report_path (str):
-                The file path where the HTML report will be saved.
-                Defaults to "cosmicqc_outlier_report.html" when None.
-
-        Returns:
-            str: The path to the saved HTML report.
-        """
-
-        # if we have none for the report path, use a default name.
-        if report_path is None:
-            report_path = "cosmicqc_outlier_report.html"
-
-        # create wrapped html for figures
-        figure_html = "".join(
-            [
-                f"<div class='fig_wrapper'>{fig.to_html(full_html=False)}</div>"
-                for fig in figures
-            ]
-        )
-
-        # configure jinja environment
-        env = Environment(
-            loader=FileSystemLoader(f"{pathlib.Path(__file__).parent!s}/data")
-        )
-        # load a jinja template
-        template = env.get_template("report_template.html")
-
-        # Render the template with Plotly figure HTML
-        rendered_html = template.render(figure_html=figure_html)
-
-        # write the html to file
-        with open(report_path, "w") as f:
-            f.write(rendered_html)
-
-        # return the path of the file
-        return report_path
-
-    def create_threshold_set_outlier_visualization(
-        self: CytoDataFrame_type,
-        df: pd.DataFrame,
-        threshold_set_name: str,
-        col_outlier: str,
-        cols_threshold_scores: List[str],
-        color_palette: Optional[List[str]] = None,
-    ) -> plotly.graph_objs._figure.Figure:
-        """
-        Creates a Plotly figure visualizing the Z-score distributions and outliers
-        for a given threshold set.
-
-        This method generates histograms for each Z-score column in the given DataFrame,
-        colors them based on outlier status, and overlays them into a single figure.
-
-        Args:
-            df (pd.DataFrame):
-                The DataFrame containing the data to be visualized.
-            threshold_set_name (str):
-                The name of the threshold set being visualized.
-            col_outlier (str):
-                The column name indicating outlier status.
-            cols_threshold_scores (List[str]):
-                A list of column names representing the Z-scores to be visualized.
-            color_palette Optional(List[str]):
-                Optional list for color palette to use.
-                Defaults to use Dark24 color palette from Plotly.
-
-        Returns:
-            plotly.graph_objs._figure.Figure:
-                A Plotly figure object containing the visualization.
-        """
-
-        # Create histograms using plotly.express with pattern_shape and random color
-        figures = [
-            px.histogram(
-                df,
-                x=col,
-                color=col_outlier,
-                nbins=50,
-                pattern_shape=col_outlier,
-                opacity=0.7,
-            )
-            for col in cols_threshold_scores
-        ]
-
-        # Create a combined figure
-        fig = go.Figure()
-
-        # check that we have enough colors for figures if provided
-        if color_palette is not None and len(color_palette) < len(figures):
-            raise ReferenceError(
-                f"Color palette length must match figure length of {len(figures)}."
-            )
-
-        # Add traces from each histogram and modify colors, names, and pattern shapes
-        for idx, fig_hist in enumerate(figures):
-            if color_palette is None:
-                # Create a default list of colors from a Plotly color palette
-                fig_color = random.choice(pc.qualitative.Dark24)
-            else:
-                # otherwise, use static color palette based on the number of figures
-                fig_color = color_palette[idx]
-
-            for trace in fig_hist.data:
-                trace.marker.color = fig_color
-                trace.marker.pattern.shape = (
-                    "x" if trace.name == "True" else ""
-                )  # Use pattern shapes
-                renamed_col = cols_threshold_scores[idx].replace(
-                    f"cqc.{threshold_set_name}.Z_Score.", ""
-                )
-                trace.name = (
-                    f"{renamed_col} ({'outlier' if trace.name == 'True' else 'inlier'})"
-                )
-                # Update hovertemplate to match the name in the key
-                trace.hovertemplate = (
-                    f"<b>{renamed_col}</b><br>"
-                    + "Z-Score: %{x}<br>"
-                    + "Single-cell Count (log): %{y}<br>"
-                    + "<extra></extra>"
-                )
-                fig.add_trace(trace)
-
-        # Update layout
-        fig.update_layout(
-            title=f"{threshold_set_name.replace('_', ' ').title()} Z-Score Outliers",
-            xaxis_title="Z-Score",
-            yaxis_title="Single-cell Count (log)",
-            yaxis_type="log",
-            # ensures that histograms are overlapping
-            barmode="overlay",
-            legend_title_text="Measurement Type and QC Status",
-            legend={
-                "orientation": "v",
-                "yanchor": "top",
-                "y": 0.95,
-                "xanchor": "left",
-                "x": 1.02,
-            },
-        )
-
-        return fig
-
     def find_image_columns(self: CytoDataFrame_type) -> bool:
         pattern = r".*\.(tif|tiff)$"
         return [