Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enable multicova #347

Open
wants to merge 6 commits into
base: development
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 11 additions & 1 deletion .secrets.baseline
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,16 @@
}
],
"results": {
".github/workflows/create_release.yml": [
{
"type": "Secret Keyword",
"filename": ".github/workflows/create_release.yml",
"hashed_secret": "3e26d6750975d678acb8fa35a0f69237881576b0",
"is_verified": false,
"line_number": 15,
"is_secret": false
}
],
"docs/workflow_mq.html": [
{
"type": "Base64 High Entropy String",
Expand All @@ -150,5 +160,5 @@
}
]
},
"generated_at": "2024-09-18T09:54:14Z"
"generated_at": "2024-10-08T15:30:41Z"
}
7 changes: 7 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,13 @@ You can run the checks yourself using:
pre-commit run --all-files
```

##### The `detect-secrets` hook fails
This is because you added some code that was identified as a potential secret.
1. Run `detect-secrets scan --exclude-files testfiles --exclude-lines '"(hash|id|image/\w+)":.*' > .secrets.baseline`
(check `.pre-commit-config.yaml` for the exact parameters)
2. Run `detect-secrets audit .secrets.baseline` and check if the detected 'secret' is actually a secret
3. Commit the latest version of `.secrets.baseline`



---
Expand Down
16 changes: 15 additions & 1 deletion alphastats/DataSet.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,8 @@ def _get_init_dataset(

return rawmat, mat, metadata, sample, preprocessing_info

def _check_loader(self, loader):
@staticmethod
def _check_loader(loader):
"""Checks if the Loader is from class AlphaPeptLoader, MaxQuantLoader, DIANNLoader, FragPipeLoader

Args:
Expand Down Expand Up @@ -240,6 +241,19 @@ def ancova(
"""A wrapper for Statistics.ancova(), see documentation there."""
return self._get_statistics().ancova(protein_id, covar, between)

def multicova_analysis(
self,
covariates: list,
n_permutations: int = 3,
fdr: float = 0.05,
s0: float = 0.05,
subset: dict = None,
) -> Tuple[pd.DataFrame, list]:
"""A wrapper for Statistics.multicova_analysis(), see documentation there."""
return self._get_statistics().multicova_analysis(
covariates, n_permutations, fdr, s0, subset
)

@check_for_missing_values
def plot_pca(self, group: Optional[str] = None, circle: bool = False):
"""Plot Principal Component Analysis (PCA)
Expand Down
73 changes: 37 additions & 36 deletions alphastats/DataSet_Statistics.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@
from functools import lru_cache
from typing import Dict, Union
from typing import Dict, Tuple, Union

import numpy as np
import pandas as pd
import pingouin

from alphastats.DataSet_Preprocess import PreprocessingStateKeys
from alphastats.statistics.Anova import Anova
from alphastats.statistics.DifferentialExpressionAnalysis import (
DifferentialExpressionAnalysis,
Expand Down Expand Up @@ -131,36 +129,39 @@ def ancova(
ancova_df = pingouin.ancova(df, dv=protein_id, covar=covar, between=between)
return ancova_df

# @ignore_warning(RuntimeWarning)
# def multicova_analysis( # TODO never used outside of tests .. how does this relate to multicova.py?
# self,
# covariates: list,
# n_permutations: int = 3,
# fdr: float = 0.05,
# s0: float = 0.05,
# subset: dict = None,
# ) -> Union[pd.DataFrame, list]:
# """Perform Multicovariat Analysis
# will return a pandas DataFrame with the results and a list of volcano plots (for each covariat)
#
# Args:
# covariates (list): list of covariates, column names in metadata
# n_permutations (int, optional): number of permutations. Defaults to 3.
# fdr (float, optional): False Discovery Rate. Defaults to 0.05.
# s0 (float, optional): . Defaults to 0.05.
# subset (dict, optional): for categorical covariates . Defaults to None.
#
# Returns:
# pd.DataFrame: Multicova Analysis results
# """
#
# res, plot_list = MultiCovaAnalysis(
# dataset=self, # TODO fix .. does this write to it?
# covariates=covariates,
# n_permutations=n_permutations,
# fdr=fdr,
# s0=s0,
# subset=subset,
# plot=True,
# ).calculate()
# return res, plot_list
@ignore_warning(RuntimeWarning)
def multicova_analysis( # TODO never used outside of tests .. how does this relate to multicova.py?
self,
covariates: list,
n_permutations: int = 3,
fdr: float = 0.05,
s0: float = 0.05,
subset: dict = None,
) -> Tuple[pd.DataFrame, list]:
"""Perform Multicovariat Analysis
will return a pandas DataFrame with the results and a list of volcano plots (for each covariat)

Args:
covariates (list): list of covariates, column names in metadata
n_permutations (int, optional): number of permutations. Defaults to 3.
fdr (float, optional): False Discovery Rate. Defaults to 0.05.
s0 (float, optional): . Defaults to 0.05.
subset (dict, optional): for categorical covariates . Defaults to None.

Returns:
pd.DataFrame: Multicova Analysis results
"""

res, plot_list = MultiCovaAnalysis(
mat=self.mat,
metadata=self.metadata,
sample=self.sample,
index_column=self.index_column,
covariates=covariates,
n_permutations=n_permutations,
fdr=fdr,
s0=s0,
subset=subset,
).calculate()

return res, plot_list
7 changes: 4 additions & 3 deletions alphastats/plots/PlotUtils.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,10 @@ class plotly_object(plotly.graph_objs._figure.Figure):
class PlotUtils:
@staticmethod
def _update_colors_plotly(fig, color_dict):
# plotly doesnt allow to assign color to certain group
# update instead the figure in form of a dict
# color_dict with group_variable/legendgroup as key, and corresponding color as value
# TODO revisit this comment:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

https://plotly.com/python-api-reference/generated/generated/plotly.graph_objects.Figure.update_traces.html + selector to set individual color or something like:
plot.for_each_trace(lambda t: t.update(marker_color=color_dict.get(t.legendgroup))

# plotly doesnt allow to assign color to certain group
# update instead the figure in form of a dict
# color_dict with group_variable/legendgroup as key, and corresponding color as value
fig_dict = fig.to_plotly_json()
data_dict_list = fig_dict.get("data")
for count, group in enumerate(data_dict_list):
Expand Down
44 changes: 25 additions & 19 deletions alphastats/statistics/MultiCovaAnalysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,24 +3,30 @@
import numpy as np
import pandas as pd
import plotly.express as px
import scipy
import tqdm

from alphastats.statistics.StatisticUtils import StatisticUtils


class MultiCovaAnalysis(StatisticUtils):
class MultiCovaAnalysis:
def __init__(
self,
dataset,
*,
mat: pd.DataFrame,
metadata: pd.DataFrame,
sample: str,
index_column: str,
covariates: list,
n_permutations: int = 3,
fdr: float = 0.05,
s0: float = 0.05,
subset: dict = None,
plot: bool = False,
):
self.dataset = dataset
self.sample = sample
self.metadata_ori = metadata
self.mat = mat
self.index_column = index_column

self.metadata = None # TODO check if the distinction between metadata and metadata_ori is necessary

self.covariates = covariates
self.n_permutations = n_permutations
self.fdr = fdr
Expand All @@ -35,30 +41,30 @@ def __init__(
self._prepare_matrix()

def _subset_metadata(self):
columns_to_keep = self.covariates + [self.dataset.sample]
columns_to_keep = self.covariates + [self.sample]
if self.subset is not None:
# dict structure {"column_name": ["group1", "group2"]}
subset_column = list(self.subset.keys())[0]
groups = self.subset.get(subset_column)
self.metadata = self.dataset.metadata[
self.dataset.metadata[subset_column].isin(groups)
self.metadata = self.metadata_ori[
self.metadata_ori[subset_column].isin(groups)
][columns_to_keep]

else:
self.metadata = self.dataset.metadata[columns_to_keep]
self.metadata = self.metadata_ori[columns_to_keep]

def _check_covariat_input(self):
# check whether covariates in metadata column
misc_covariates = list(
set(self.covariates) - set(self.dataset.metadata.columns.to_list())
set(self.covariates) - set(self.metadata_ori.columns.to_list())
)
if len(misc_covariates) > 0:
warnings.warn(f"Covariates: {misc_covariates} are not found in Metadata.")
self.covariates = [x for x in self.covariates if x not in misc_covariates]

def _check_na_values(self):
for covariate in self.covariates:
if self.dataset.metadata[covariate].isna().any():
if self.metadata_ori[covariate].isna().any():
self.covariates.remove(covariate)
warnings.warn(
f"Covariate: {covariate} contains missing values"
Expand Down Expand Up @@ -101,10 +107,10 @@ def _convert_string_to_binary(self):
self.covariates.remove(col)

def _prepare_matrix(self):
transposed = self.dataset.mat.transpose()
transposed[self.dataset.index_column] = transposed.index
transposed = self.mat.transpose()
transposed[self.index_column] = transposed.index
transposed = transposed.reset_index(drop=True)
self.transposed = transposed[self.metadata[self.dataset.sample].to_list()]
self.transposed = transposed[self.metadata[self.sample].to_list()]

def _plot_volcano_regression(self, res_real, variable):
sig_col = res_real.filter(regex=variable + "_" + "FDR").columns[0]
Expand All @@ -115,7 +121,7 @@ def _plot_volcano_regression(self, res_real, variable):
y=-np.log10(res_real[variable + "_" + "pval"]),
color=res_real[sig_col],
color_discrete_map={"sig": "#009599", "non_sig": "#404040"},
hover_name=res_real[self.dataset.index_column],
hover_name=res_real[self.index_column],
title=variable,
labels=dict(x="beta value", y="-log10(p-value)", color=sig_level),
)
Expand All @@ -133,12 +139,12 @@ def calculate(self):
quant_data=self.transposed,
annotation=self.metadata,
covariates=self.covariates,
sample_column=self.dataset.sample,
sample_column=self.sample,
n_permutations=self.n_permutations,
fdr=self.fdr,
s0=self.s0,
)
res[self.dataset.index_column] = self.dataset.mat.columns.to_list()
res[self.index_column] = self.mat.columns.to_list()
plot_list = []

if self.plot:
Expand Down
29 changes: 0 additions & 29 deletions alphastats/statistics/StatisticUtils.py

This file was deleted.

2 changes: 0 additions & 2 deletions tests/test_DataSet.py
Original file line number Diff line number Diff line change
Expand Up @@ -770,8 +770,6 @@ def test_batch_correction(self):
first_value = self.obj.mat.values[0, 0]
self.assertTrue(np.isclose(2.624937690577153e-08, first_value))

# TODO this opens a plot in a browser window
@skip # TODO multicova_analysis is unused
def test_multicova_analysis_invalid_covariates(self):
self.obj.preprocess(imputation="knn", normalization="zscore", subset=True)
res, _ = self.obj.multicova_analysis(
Expand Down
Loading