From a22bb0d844b1511ace4f31bf71c79353c0a69b90 Mon Sep 17 00:00:00 2001 From: Dave Bunten Date: Thu, 7 Nov 2024 13:28:54 -0700 Subject: [PATCH] remove show_report capabilities and cosmicqc dep (#21) --- media/coverage-badge.svg | 2 +- poetry.lock | 57 +-------- pyproject.toml | 2 - src/cytodataframe/frame.py | 251 ------------------------------------- tests/test_frame.py | 58 --------- 5 files changed, 3 insertions(+), 367 deletions(-) diff --git a/media/coverage-badge.svg b/media/coverage-badge.svg index c2cb5c6..091c0ca 100644 --- a/media/coverage-badge.svg +++ b/media/coverage-badge.svg @@ -1 +1 @@ -coverage: 83.37%coverage83.37% \ No newline at end of file +coverage: 91.05%coverage91.05% \ No newline at end of file diff --git a/poetry.lock b/poetry.lock index d304461..8434621 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.4 and should not be changed by hand. [[package]] name = "accessible-pygments" @@ -503,31 +503,6 @@ traitlets = ">=4" [package.extras] test = ["pytest"] -[[package]] -name = "cosmicqc" -version = "0.0.11" -description = "coSMicQC is a software tool for single-cell quality control of morphology datasets." -optional = false -python-versions = "<3.13,>=3.9" -files = [ - {file = "cosmicqc-0.0.11-py3-none-any.whl", hash = "sha256:d7cd4c6803a399a0b92008348e0d233496e08c03d623bd5524db3cc9a53af19b"}, - {file = "cosmicqc-0.0.11.tar.gz", hash = "sha256:74506f85b7935e6d1e1c221152a6c26a86b9772e5546dbb44689b109b75265ce"}, -] - -[package.dependencies] -fire = ">=0.6.0,<0.7.0" -ipython = ">=8.12.3,<9.0.0" -jinja2 = ">=3.1.4,<4.0.0" -kaleido = "0.2.1" -opencv-python = ">=4.10.0.84,<5.0.0.0" -pandas = {version = ">=2.2.2,<3.0.0", markers = "python_version >= \"3.9\""} -plotly = ">=5.22.0,<6.0.0" -pyarrow = ">=16.0.0,<17.0.0" -pywavelets = {version = ">1.4.1", markers = "python_version >= \"3.9\""} -pyyaml = ">=6.0.1,<7.0.0" -scikit-image = {version = ">0.19.3", markers = "python_version >= \"3.9\""} -scipy = {version = ">=1.13.0,<2.0.0", markers = "python_version >= \"3.9\""} - [[package]] name = "coverage" version = "7.6.4" @@ -722,20 +697,6 @@ files = [ [package.extras] devel = ["colorama", "json-spec", "jsonschema", "pylint", "pytest", "pytest-benchmark", "pytest-cache", "validictory"] -[[package]] -name = "fire" -version = "0.6.0" -description = "A library for automatically generating command line interfaces." -optional = false -python-versions = "*" -files = [ - {file = "fire-0.6.0.tar.gz", hash = "sha256:54ec5b996ecdd3c0309c800324a0703d6da512241bc73b553db959d98de0aa66"}, -] - -[package.dependencies] -six = "*" -termcolor = "*" - [[package]] name = "fqdn" version = "1.5.1" @@ -3354,20 +3315,6 @@ files = [ doc = ["reno", "sphinx"] test = ["pytest", "tornado (>=4.5)", "typeguard"] -[[package]] -name = "termcolor" -version = "2.4.0" -description = "ANSI color formatting for output in terminal" -optional = false -python-versions = ">=3.8" -files = [ - {file = "termcolor-2.4.0-py3-none-any.whl", hash = "sha256:9297c0df9c99445c2412e832e882a7884038a25617c60cea2ad69488d4040d63"}, - {file = "termcolor-2.4.0.tar.gz", hash = "sha256:aab9e56047c8ac41ed798fa36d892a37aca6b3e9159f3e0c24bc64a9b3ac7b7a"}, -] - -[package.extras] -tests = ["pytest", "pytest-cov"] - [[package]] name = "terminado" version = "0.18.1" @@ -3605,4 +3552,4 @@ test = ["big-O", "importlib-resources", "jaraco.functools", "jaraco.itertools", [metadata] lock-version = "2.0" python-versions = ">=3.9,<3.13" -content-hash = "457253903cd8fbb797a4dabb34dbbaf94f275c706bd618b6f8ca6da5a74fe1e4" +content-hash = "3223b39520566b01fac8e5f19d7526637e0b27f85ad8faa3a14776c9ea816222" diff --git a/pyproject.toml b/pyproject.toml index eea78eb..6878141 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -38,8 +38,6 @@ pywavelets = [ ] # used for image modifications in cytodataframe opencv-python = "^4.10.0.84" -# used for report visualizations from within cytodataframe -cosmicqc = "^0.0.11" imagecodecs = "^2024.9.22" [tool.poetry.group.dev.dependencies] diff --git a/src/cytodataframe/frame.py b/src/cytodataframe/frame.py index 98f08ee..32cffb8 100644 --- a/src/cytodataframe/frame.py +++ b/src/cytodataframe/frame.py @@ -4,9 +4,7 @@ import base64 import pathlib -import random import re -import webbrowser from io import BytesIO, StringIO from typing import ( Any, @@ -22,15 +20,10 @@ import numpy as np import pandas as pd -import plotly -import plotly.colors as pc -import plotly.express as px -import plotly.graph_objects as go import skimage import skimage.io import skimage.measure from IPython import get_ipython -from jinja2 import Environment, FileSystemLoader from pandas._config import ( get_option, ) @@ -359,250 +352,6 @@ def is_notebook_or_lab() -> bool: except NameError: return False - def show_report( - self: CytoDataFrame_type, - report_path: Optional[str] = None, - auto_open: bool = True, - color_palette: Optional[List[str]] = None, - ) -> None: - """ - Generates and displays a report based on the current DataFrame's data - quality control (DQC) columns. - - This method organizes the DQC columns from the DataFrame, creates - visualizations for each threshold set, and then either displays the - visualizations inline (if running in a Jupyter notebook or lab) or - opens an HTML report in the default web browser. - - Args: - report_path (Optional[str]): - The file path where the HTML report should be saved and displayed. - If `None`, the report will be displayed inline if in a notebook - or lab environment. - auto_open: bool: - Whether to automatically open the report. - color_palette Optional(List[str]): - Optional list for color palette to use. - - Raises: - ValueError: If the DataFrame does not contain any DQC columns. - """ - - # find all cosmicqc columns in the data using the prefix `cqc.` - cqc_cols = [col for col in self.columns.tolist() if "cqc." in col] - # organize column data into the threshold set name, threshold is_outlier col, - # and the threshold score columns as list - organized_columns = [ - [ - # name of the threshold set - threshold_set, - # column which includes boolean is_outlier data for threshold set - next( - ( - col - for col in cqc_cols - if f"cqc.{threshold_set}.is_outlier" in col - ), - None, - ), - # columns which show the data associated with thresholds - [col for col in cqc_cols if f"cqc.{threshold_set}.Z_Score." in col], - ] - for threshold_set in sorted({col.split(".")[1] for col in cqc_cols}) - ] - - # create figures for visualization based on the name, outlier status, - # and threshold scores - figures = [ - self.create_threshold_set_outlier_visualization( - df=self, - threshold_set_name=set_name, - col_outlier=col_outlier, - cols_threshold_scores=cols_threshold_scores, - color_palette=color_palette, - ) - for set_name, col_outlier, cols_threshold_scores in organized_columns - ] - - # if we're running in a notebook or jupyter lab, show the figures as-is - if self.is_notebook_or_lab() or report_path is None: - # if we should automatically open, show the figures - if auto_open: - for figure in figures: - figure.show() - - return figures - - # otherwise, create an html file with figures and open it with default browser - else: - html_path = self.create_figure_group_html( - figures=figures, report_path=report_path - ) - - # if we should auto open, show the html file in default web browser - if auto_open: - webbrowser.open(f"file://{pathlib.Path(html_path).resolve()}") - - print(f"Opened default web browser for report {html_path}") - - return html_path - - @staticmethod - def create_figure_group_html( - figures: List[plotly.graph_objs._figure.Figure], - report_path: Optional[str] = None, - ) -> str: - """ - Generates an HTML file containing multiple Plotly figures. - - This method takes a list of Plotly figure objects, converts them to HTML, - and embeds them into a template HTML file. The resulting HTML file is then - saved to the specified path. - - Args: - figures (List[plotly.graph_objs._figure.Figure]): - A list of Plotly figure objects to be included in the HTML report. - report_path (str): - The file path where the HTML report will be saved. - Defaults to "cosmicqc_outlier_report.html" when None. - - Returns: - str: The path to the saved HTML report. - """ - - # if we have none for the report path, use a default name. - if report_path is None: - report_path = "cosmicqc_outlier_report.html" - - # create wrapped html for figures - figure_html = "".join( - [ - f"
{fig.to_html(full_html=False)}
" - for fig in figures - ] - ) - - # configure jinja environment - env = Environment( - loader=FileSystemLoader(f"{pathlib.Path(__file__).parent!s}/data") - ) - # load a jinja template - template = env.get_template("report_template.html") - - # Render the template with Plotly figure HTML - rendered_html = template.render(figure_html=figure_html) - - # write the html to file - with open(report_path, "w") as f: - f.write(rendered_html) - - # return the path of the file - return report_path - - def create_threshold_set_outlier_visualization( - self: CytoDataFrame_type, - df: pd.DataFrame, - threshold_set_name: str, - col_outlier: str, - cols_threshold_scores: List[str], - color_palette: Optional[List[str]] = None, - ) -> plotly.graph_objs._figure.Figure: - """ - Creates a Plotly figure visualizing the Z-score distributions and outliers - for a given threshold set. - - This method generates histograms for each Z-score column in the given DataFrame, - colors them based on outlier status, and overlays them into a single figure. - - Args: - df (pd.DataFrame): - The DataFrame containing the data to be visualized. - threshold_set_name (str): - The name of the threshold set being visualized. - col_outlier (str): - The column name indicating outlier status. - cols_threshold_scores (List[str]): - A list of column names representing the Z-scores to be visualized. - color_palette Optional(List[str]): - Optional list for color palette to use. - Defaults to use Dark24 color palette from Plotly. - - Returns: - plotly.graph_objs._figure.Figure: - A Plotly figure object containing the visualization. - """ - - # Create histograms using plotly.express with pattern_shape and random color - figures = [ - px.histogram( - df, - x=col, - color=col_outlier, - nbins=50, - pattern_shape=col_outlier, - opacity=0.7, - ) - for col in cols_threshold_scores - ] - - # Create a combined figure - fig = go.Figure() - - # check that we have enough colors for figures if provided - if color_palette is not None and len(color_palette) < len(figures): - raise ReferenceError( - f"Color palette length must match figure length of {len(figures)}." - ) - - # Add traces from each histogram and modify colors, names, and pattern shapes - for idx, fig_hist in enumerate(figures): - if color_palette is None: - # Create a default list of colors from a Plotly color palette - fig_color = random.choice(pc.qualitative.Dark24) - else: - # otherwise, use static color palette based on the number of figures - fig_color = color_palette[idx] - - for trace in fig_hist.data: - trace.marker.color = fig_color - trace.marker.pattern.shape = ( - "x" if trace.name == "True" else "" - ) # Use pattern shapes - renamed_col = cols_threshold_scores[idx].replace( - f"cqc.{threshold_set_name}.Z_Score.", "" - ) - trace.name = ( - f"{renamed_col} ({'outlier' if trace.name == 'True' else 'inlier'})" - ) - # Update hovertemplate to match the name in the key - trace.hovertemplate = ( - f"{renamed_col}
" - + "Z-Score: %{x}
" - + "Single-cell Count (log): %{y}
" - + "" - ) - fig.add_trace(trace) - - # Update layout - fig.update_layout( - title=f"{threshold_set_name.replace('_', ' ').title()} Z-Score Outliers", - xaxis_title="Z-Score", - yaxis_title="Single-cell Count (log)", - yaxis_type="log", - # ensures that histograms are overlapping - barmode="overlay", - legend_title_text="Measurement Type and QC Status", - legend={ - "orientation": "v", - "yanchor": "top", - "y": 0.95, - "xanchor": "left", - "x": 1.02, - }, - ) - - return fig - def find_image_columns(self: CytoDataFrame_type) -> bool: pattern = r".*\.(tif|tiff)$" return [ diff --git a/tests/test_frame.py b/tests/test_frame.py index d143486..dd981dc 100644 --- a/tests/test_frame.py +++ b/tests/test_frame.py @@ -5,11 +5,8 @@ import pathlib from io import BytesIO -import cosmicqc import numpy as np import pandas as pd -import plotly -import plotly.colors as pc import pytest from pyarrow import parquet @@ -113,33 +110,6 @@ def test_cytodataframe_input( pd.testing.assert_frame_equal(copy_sc_df, sc_df) -def test_show_report(cytotable_CFReT_data_df: pd.DataFrame): - """ - Used for testing show report capabilities - """ - - df = cosmicqc.analyze.label_outliers( - df=cytotable_CFReT_data_df, - include_threshold_scores=True, - ) - - figures = df.show_report(auto_open=False) - - expected_number_figures = 3 - assert len(figures) == expected_number_figures - assert ( - next(iter({type(figure) for figure in figures})) - == plotly.graph_objs._figure.Figure - ) - - df.show_report( - report_path=(report_path := pathlib.Path("cosmicqc_example_report.html")), - auto_open=False, - ) - - assert report_path.is_file() - - def test_repr_html( cytotable_NF1_data_parquet_shrunken: str, cytotable_nuclear_speckles_data_parquet: str, @@ -171,34 +141,6 @@ def test_repr_html( ), "The nuclear speckles images do not contain green outlines." -@pytest.mark.generate_report_image -def fixture_generate_show_report_html_output(cytotable_CFReT_data_df: pd.DataFrame): - """ - Used for generating report output for use with other tests. - """ - - # create outliers dataframe - df = cosmicqc.analyze.label_outliers( - df=cytotable_CFReT_data_df, - include_threshold_scores=True, - ) - - # show a report - df.show_report( - report_path=( - report_path := pathlib.Path(__file__).parent - / "data" - / "coSMicQC" - / "show_report" - / "cosmicqc_example_report.html" - ), - color_palette=pc.qualitative.Dark24[0:2], - auto_open=False, - ) - - return report_path - - def test_overlay_with_valid_images(): """ Tests the `draw_outline_on_image_from_outline` function