Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor analysis i #366

Open
wants to merge 9 commits into
base: remove_sample_column
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions alphastats/gui/gui.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ def run():
os.system(
f"python -m streamlit run AlphaPeptStats.py --global.developmentMode=false {extra_args}"
)

# TODO why are we starting the app a second time here?
_this_file = os.path.abspath(__file__)
_this_directory = os.path.dirname(_this_file)

Expand Down
26 changes: 13 additions & 13 deletions alphastats/gui/pages/04_Analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@
show_df = False
analysis_result = None

c1, *_ = st.columns([1, 1, 1])
c1, c2 = st.columns([0.33, 0.67])
with c1:
method = st.selectbox(
"Analysis",
Expand All @@ -66,21 +66,21 @@
)
show_df = analysis_result is not None

with c2:
# --- SHOW PLOT -------------------------------------------------------
if show_plot:
display_plot(method, analysis_result)

# --- SHOW PLOT -------------------------------------------------------
if show_plot:
display_plot(method, analysis_result)
# --- SHOW STATISTICAL ANALYSIS -------------------------------------------------------
elif show_df:
display_df(analysis_result)

# --- SHOW STATISTICAL ANALYSIS -------------------------------------------------------
elif show_df:
display_df(analysis_result)
csv = convert_df(analysis_result)

csv = convert_df(analysis_result)

# TODO do we want to save statistical analysis to results page as well?
st.download_button(
"Download as .csv", csv, method + ".csv", "text/csv", key="download-csv"
)
# TODO do we want to save statistical analysis to results page as well?
st.download_button(
"Download as .csv", csv, method + ".csv", "text/csv", key="download-csv"
)


@st.fragment
Expand Down
155 changes: 155 additions & 0 deletions alphastats/gui/utils/analysis.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
"""Module providing frontend widgets for gathering parameters and mapping them to the actual analysis."""

from abc import ABC, abstractmethod
from collections import defaultdict

import streamlit as st

from alphastats.DataSet import DataSet
from alphastats.keys import Cols
from alphastats.plots.VolcanoPlot import VolcanoPlot


class Analysis(ABC):
"""Abstract class for analysis widgets."""

def __init__(self, dataset):
self._dataset: DataSet = dataset
self._parameters = defaultdict(lambda: None)

@abstractmethod
def show_widget(self):
"""Show the widget and gather parameters."""
pass

@abstractmethod
def do_analysis(self):
"""Perform the analysis.

Returns a tuple(figure, analysis_object, parameters) where figure is the plot,
analysis_object is the underlying object, parameters is a dictionary of the parameters used.
"""
pass


class GroupCompareAnalysis(Analysis, ABC):
"""Abstract class for group comparison analysis widgets."""

def show_widget(self):
"""Gather parameters to compare two group."""

metadata = self._dataset.metadata
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the logic is copied from helper_compare_two_groups


default_option = "<select>"
custom_group_option = "Custom group from samples .."

grouping_variable = st.selectbox(
"Grouping variable",
options=[default_option]
+ metadata.columns.to_list()
+ [custom_group_option],
)

column = None
if grouping_variable == default_option:
st.stop() # TODO: using stop here is not really great
elif grouping_variable != custom_group_option:
unique_values = metadata[grouping_variable].unique().tolist()

column = grouping_variable
group1 = st.selectbox("Group 1", options=unique_values)
group2 = st.selectbox("Group 2", options=list(reversed(unique_values)))

else:
group1 = st.multiselect(
"Group 1 samples:",
options=metadata[Cols.SAMPLE].to_list(),
)

group2 = st.multiselect(
"Group 2 samples:",
options=list(reversed(metadata[Cols.SAMPLE].to_list())),
)

intersection_list = list(set(group1).intersection(set(group2)))
if len(intersection_list) > 0:
st.warning(
"Group 1 and Group 2 contain same samples: "
+ str(intersection_list)
)

if group1 == group2:
st.error(
"Group 1 and Group 2 can not be the same. Please select different groups."
)
st.stop()

self._parameters.update({"group1": group1, "group2": group2})
if column is not None:
self._parameters["column"] = column


class VolcanoPlotAnalysis(GroupCompareAnalysis):
"""Widget for Volcano Plot analysis."""

def show_widget(self):
"""Show the widget and gather parameters."""
super().show_widget()

parameters = {}
method = st.selectbox(
"Differential Analysis using:",
options=["ttest", "anova", "wald", "sam", "paired-ttest", "welch-ttest"],
)
parameters["method"] = method

parameters["labels"] = st.checkbox("Add labels", value=True)

parameters["draw_line"] = st.checkbox("Draw lines", value=True)

parameters["alpha"] = st.number_input(
label="alpha", min_value=0.001, max_value=0.050, value=0.050
)

parameters["min_fc"] = st.select_slider(
"Foldchange cutoff", range(0, 3), value=1
)

if method == "sam":
parameters["perm"] = st.number_input(
label="Number of Permutations", min_value=1, max_value=1000, value=10
)
parameters["fdr"] = st.number_input(
label="FDR cut off", min_value=0.005, max_value=0.1, value=0.050
)

self._parameters.update(parameters)

def do_analysis(self):
"""Draw Volcano Plot using the VolcanoPlot class.

Returns a tuple(figure, analysis_object, parameters) where figure is the plot,
analysis_object is the underlying object, parameters is a dictionary of the parameters used.
"""
# TODO currently there's no other way to obtain both the plot and the underlying data
# Should be refactored such that the interface provided by DateSet.plot_volcano() is used
# One option could be to always return the whole analysis object.

volcano_plot = VolcanoPlot(
mat=self._dataset.mat,
rawinput=self._dataset.rawinput,
metadata=self._dataset.metadata,
preprocessing_info=self._dataset.preprocessing_info,
group1=self._parameters["group1"],
group2=self._parameters["group2"],
column=self._parameters["column"],
method=self._parameters["method"],
labels=self._parameters["labels"],
min_fc=self._parameters["min_fc"],
alpha=self._parameters["alpha"],
draw_line=self._parameters["draw_line"],
perm=self._parameters["perm"],
fdr=self._parameters["fdr"],
color_list=self._parameters["color_list"],
)
return volcano_plot.plot, volcano_plot, self._parameters
67 changes: 8 additions & 59 deletions alphastats/gui/utils/analysis_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@
import pandas as pd
import streamlit as st

from alphastats.gui.utils.analysis import VolcanoPlotAnalysis
from alphastats.gui.utils.ui_helper import StateKeys, convert_df
from alphastats.keys import Cols
from alphastats.plots.VolcanoPlot import VolcanoPlot


def display_figure(plot):
Expand Down Expand Up @@ -118,62 +118,6 @@ def st_general(method_dict):
return chosen_parameter_dict


def gui_volcano_plot() -> Tuple[Optional[Any], Optional[Any], Optional[Dict]]:
"""Draw Volcano Plot using the VolcanoPlot class.

Returns a tuple(figure, analysis_object, parameters) where figure is the plot,
analysis_object is the underlying object, parameters is a dictionary of the parameters used.
"""
parameters = helper_compare_two_groups()
method = st.selectbox(
"Differential Analysis using:",
options=["ttest", "anova", "wald", "sam", "paired-ttest", "welch-ttest"],
)
parameters.update({"method": method})

labels = st.checkbox("Add label")
parameters.update({"labels": labels})

draw_line = st.checkbox("Draw line")
parameters.update({"draw_line": draw_line})

alpha = st.number_input(
label="alpha", min_value=0.001, max_value=0.050, value=0.050
)
parameters.update({"alpha": alpha})

min_fc = st.select_slider("Foldchange cutoff", range(0, 3), value=1)
parameters.update({"min_fc": min_fc})

if method == "sam":
perm = st.number_input(
label="Number of Permutations", min_value=1, max_value=1000, value=10
)
fdr = st.number_input(
label="FDR cut off", min_value=0.005, max_value=0.1, value=0.050
)
parameters.update({"perm": perm, "fdr": fdr})

submitted = st.button("Run analysis ..")

if submitted:
dataset = st.session_state[StateKeys.DATASET]

# TODO currently there's no other way to obtain both the plot and the underlying data
# Should be refactored such that the interface provided by DateSet.plot_volcano() is used
# One option could be to alyways return the whole analysis object.
volcano_plot = VolcanoPlot(
mat=dataset.mat,
rawinput=dataset.rawinput,
metadata=dataset.metadata,
preprocessing_info=dataset.preprocessing_info,
**parameters,
)
return volcano_plot.plot, volcano_plot, parameters

return None, None, None


def do_analysis(
method: str, options_dict: Dict[str, Any]
) -> Tuple[Optional[Any], Optional[Any], Dict[str, Any]]:
Expand All @@ -185,11 +129,15 @@ def do_analysis(
Currently, analysis_object is only not-None for Volcano Plot.
# TODO unify the API of all analysis methods
"""

method_dict = options_dict.get(method)

if method == "Volcano Plot":
return gui_volcano_plot()
analysis = VolcanoPlotAnalysis(st.session_state[StateKeys.DATASET])
analysis.show_widget()

if st.button("Run analysis .."):
return analysis.do_analysis()
return None, None, {}

elif method == "t-SNE Plot":
parameters = st_tsne_options(method_dict)
Expand Down Expand Up @@ -235,6 +183,7 @@ def helper_plot_dimensionality_reduction(method_dict):
return chosen_parameter_dict


# TODO this can be deleted after all analysis adapted the new Pattern (cf. analysis.py:Analysis())
def helper_compare_two_groups():
"""
Helper function to compare two groups for example
Expand Down
5 changes: 1 addition & 4 deletions alphastats/gui/utils/options.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,10 +71,7 @@ def get_plotting_options(state):
},
"function": dataset.plot_tsne,
},
"Volcano Plot": {
"between_two_groups": True, # TODO: between_two_groups is never used anywhere
"function": dataset.plot_volcano,
},
"Volcano Plot": {},
"Clustermap": {"function": dataset.plot_clustermap},
# "Dendrogram": {"function": state[StateKeys.DATASET].plot_dendrogram}, # TODO why commented?
}
Expand Down
4 changes: 0 additions & 4 deletions alphastats/llm/enrichment_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,6 @@
import requests
from gprofiler import GProfiler

# TODO needed?
# from Bio import Entrez
# Entrez.email = "lebedev_mikhail@outlook.com" # Always provide your email address when using NCBI services.


def _get_functional_annotation_string(identifier, species_id="9606") -> pd.DataFrame:
"""
Expand Down
2 changes: 1 addition & 1 deletion alphastats/statistics/MultiCovaAnalysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ def __init__(
subset: dict = None,
plot: bool = False,
):
self.dataset = dataset # TODO pass only .mat, .metadata and .sample
self.dataset = dataset # TODO pass only .mat, .metadata
self.covariates = covariates
self.n_permutations = n_permutations
self.fdr = fdr
Expand Down
1 change: 0 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
anndata==0.9.1
biopython==1.83
click==8.0.1
combat==0.3.3
data_cache>=0.1.6
Expand Down
Loading