MannLabs · mschwoer · Oct 8, 2024 · Oct 8, 2024 · Oct 8, 2024 · Oct 8, 2024
diff --git a/.secrets.baseline b/.secrets.baseline
@@ -139,6 +139,16 @@
     }
   ],
   "results": {
+    ".github/workflows/create_release.yml": [
+      {
+        "type": "Secret Keyword",
+        "filename": ".github/workflows/create_release.yml",
+        "hashed_secret": "3e26d6750975d678acb8fa35a0f69237881576b0",
+        "is_verified": false,
+        "line_number": 15,
+        "is_secret": false
+      }
+    ],
     "docs/workflow_mq.html": [
       {
         "type": "Base64 High Entropy String",
@@ -150,5 +160,5 @@
       }
     ]
   },
-  "generated_at": "2024-09-18T09:54:14Z"
+  "generated_at": "2024-10-08T15:30:41Z"
 }
diff --git a/README.md b/README.md
@@ -148,6 +148,13 @@ You can run the checks yourself using:
 pre-commit run --all-files
 ```
 
+##### The `detect-secrets` hook fails
+This is because you added some code that was identified as a potential secret.
+1. Run `detect-secrets scan --exclude-files testfiles --exclude-lines '"(hash|id|image/\w+)":.*' > .secrets.baseline`
+(check `.pre-commit-config.yaml` for the exact parameters)
+2. Run `detect-secrets audit .secrets.baseline` and check if the detected 'secret' is actually a secret
+3. Commit the latest version of `.secrets.baseline`
+
 
 
 ---

diff --git a/alphastats/DataSet.py b/alphastats/DataSet.py
@@ -116,7 +116,8 @@ def _get_init_dataset(
 
         return rawmat, mat, metadata, sample, preprocessing_info
 
-    def _check_loader(self, loader):
+    @staticmethod
+    def _check_loader(loader):
         """Checks if the Loader is from class AlphaPeptLoader, MaxQuantLoader, DIANNLoader, FragPipeLoader
 
         Args:
@@ -240,6 +241,19 @@ def ancova(
         """A wrapper for Statistics.ancova(), see documentation there."""
         return self._get_statistics().ancova(protein_id, covar, between)
 
+    def multicova_analysis(
+        self,
+        covariates: list,
+        n_permutations: int = 3,
+        fdr: float = 0.05,
+        s0: float = 0.05,
+        subset: dict = None,
+    ) -> Tuple[pd.DataFrame, list]:
+        """A wrapper for Statistics.multicova_analysis(), see documentation there."""
+        return self._get_statistics().multicova_analysis(
+            covariates, n_permutations, fdr, s0, subset
+        )
+
     @check_for_missing_values
     def plot_pca(self, group: Optional[str] = None, circle: bool = False):
         """Plot Principal Component Analysis (PCA)

diff --git a/alphastats/DataSet_Statistics.py b/alphastats/DataSet_Statistics.py
@@ -1,11 +1,9 @@
 from functools import lru_cache
-from typing import Dict, Union
+from typing import Dict, Tuple, Union
 
-import numpy as np
 import pandas as pd
 import pingouin
 
-from alphastats.DataSet_Preprocess import PreprocessingStateKeys
 from alphastats.statistics.Anova import Anova
 from alphastats.statistics.DifferentialExpressionAnalysis import (
     DifferentialExpressionAnalysis,
@@ -131,36 +129,39 @@ def ancova(
         ancova_df = pingouin.ancova(df, dv=protein_id, covar=covar, between=between)
         return ancova_df
 
-    # @ignore_warning(RuntimeWarning)
-    # def multicova_analysis(  # TODO never used outside of tests .. how does this relate to multicova.py?
-    #     self,
-    #     covariates: list,
-    #     n_permutations: int = 3,
-    #     fdr: float = 0.05,
-    #     s0: float = 0.05,
-    #     subset: dict = None,
-    # ) -> Union[pd.DataFrame, list]:
-    #     """Perform Multicovariat Analysis
-    #     will return a pandas DataFrame with the results and a list of volcano plots (for each covariat)
-    #
-    #     Args:
-    #         covariates (list): list of covariates, column names in metadata
-    #         n_permutations (int, optional): number of permutations. Defaults to 3.
-    #         fdr (float, optional): False Discovery Rate. Defaults to 0.05.
-    #         s0 (float, optional): . Defaults to 0.05.
-    #         subset (dict, optional): for categorical covariates . Defaults to None.
-    #
-    #     Returns:
-    #         pd.DataFrame: Multicova Analysis results
-    #     """
-    #
-    #     res, plot_list = MultiCovaAnalysis(
-    #         dataset=self,  # TODO fix .. does this write to it?
-    #         covariates=covariates,
-    #         n_permutations=n_permutations,
-    #         fdr=fdr,
-    #         s0=s0,
-    #         subset=subset,
-    #         plot=True,
-    #     ).calculate()
-    #     return res, plot_list
+    @ignore_warning(RuntimeWarning)
+    def multicova_analysis(  # TODO never used outside of tests .. how does this relate to multicova.py?
+        self,
+        covariates: list,
+        n_permutations: int = 3,
+        fdr: float = 0.05,
+        s0: float = 0.05,
+        subset: dict = None,
+    ) -> Tuple[pd.DataFrame, list]:
+        """Perform Multicovariat Analysis
+        will return a pandas DataFrame with the results and a list of volcano plots (for each covariat)
+
+        Args:
+            covariates (list): list of covariates, column names in metadata
+            n_permutations (int, optional): number of permutations. Defaults to 3.
+            fdr (float, optional): False Discovery Rate. Defaults to 0.05.
+            s0 (float, optional): . Defaults to 0.05.
+            subset (dict, optional): for categorical covariates . Defaults to None.
+
+        Returns:
+            pd.DataFrame: Multicova Analysis results
+        """
+
+        res, plot_list = MultiCovaAnalysis(
+            mat=self.mat,
+            metadata=self.metadata,
+            sample=self.sample,
+            index_column=self.index_column,
+            covariates=covariates,
+            n_permutations=n_permutations,
+            fdr=fdr,
+            s0=s0,
+            subset=subset,
+        ).calculate()
+
+        return res, plot_list
diff --git a/alphastats/plots/PlotUtils.py b/alphastats/plots/PlotUtils.py
@@ -42,9 +42,10 @@ class plotly_object(plotly.graph_objs._figure.Figure):
 class PlotUtils:
     @staticmethod
     def _update_colors_plotly(fig, color_dict):
-        # plotly doesnt allow to assign color to certain group
-        # update instead the figure in form of a dict
-        # color_dict with group_variable/legendgroup as key, and corresponding color as value
+        # TODO revisit this comment:
+        #  plotly doesnt allow to assign color to certain group
+        #  update instead the figure in form of a dict
+        #  color_dict with group_variable/legendgroup as key, and corresponding color as value
         fig_dict = fig.to_plotly_json()
         data_dict_list = fig_dict.get("data")
         for count, group in enumerate(data_dict_list):

diff --git a/alphastats/statistics/MultiCovaAnalysis.py b/alphastats/statistics/MultiCovaAnalysis.py
@@ -3,24 +3,30 @@
 import numpy as np
 import pandas as pd
 import plotly.express as px
-import scipy
-import tqdm
 
-from alphastats.statistics.StatisticUtils import StatisticUtils
 
-
-class MultiCovaAnalysis(StatisticUtils):
+class MultiCovaAnalysis:
     def __init__(
         self,
-        dataset,
+        *,
+        mat: pd.DataFrame,
+        metadata: pd.DataFrame,
+        sample: str,
+        index_column: str,
         covariates: list,
         n_permutations: int = 3,
         fdr: float = 0.05,
         s0: float = 0.05,
         subset: dict = None,
         plot: bool = False,
     ):
-        self.dataset = dataset
+        self.sample = sample
+        self.metadata_ori = metadata
+        self.mat = mat
+        self.index_column = index_column
+
+        self.metadata = None  # TODO check if the distinction between metadata and metadata_ori is necessary
+
         self.covariates = covariates
         self.n_permutations = n_permutations
         self.fdr = fdr
@@ -35,30 +41,30 @@ def __init__(
         self._prepare_matrix()
 
     def _subset_metadata(self):
-        columns_to_keep = self.covariates + [self.dataset.sample]
+        columns_to_keep = self.covariates + [self.sample]
         if self.subset is not None:
             # dict structure {"column_name": ["group1", "group2"]}
             subset_column = list(self.subset.keys())[0]
             groups = self.subset.get(subset_column)
-            self.metadata = self.dataset.metadata[
-                self.dataset.metadata[subset_column].isin(groups)
+            self.metadata = self.metadata_ori[
+                self.metadata_ori[subset_column].isin(groups)
             ][columns_to_keep]
 
         else:
-            self.metadata = self.dataset.metadata[columns_to_keep]
+            self.metadata = self.metadata_ori[columns_to_keep]
 
     def _check_covariat_input(self):
         # check whether covariates in metadata column
         misc_covariates = list(
-            set(self.covariates) - set(self.dataset.metadata.columns.to_list())
+            set(self.covariates) - set(self.metadata_ori.columns.to_list())
         )
         if len(misc_covariates) > 0:
             warnings.warn(f"Covariates: {misc_covariates} are not found in Metadata.")
             self.covariates = [x for x in self.covariates if x not in misc_covariates]
 
     def _check_na_values(self):
         for covariate in self.covariates:
-            if self.dataset.metadata[covariate].isna().any():
+            if self.metadata_ori[covariate].isna().any():
                 self.covariates.remove(covariate)
                 warnings.warn(
                     f"Covariate: {covariate} contains missing values"
@@ -101,10 +107,10 @@ def _convert_string_to_binary(self):
                     self.covariates.remove(col)
 
     def _prepare_matrix(self):
-        transposed = self.dataset.mat.transpose()
-        transposed[self.dataset.index_column] = transposed.index
+        transposed = self.mat.transpose()
+        transposed[self.index_column] = transposed.index
         transposed = transposed.reset_index(drop=True)
-        self.transposed = transposed[self.metadata[self.dataset.sample].to_list()]
+        self.transposed = transposed[self.metadata[self.sample].to_list()]
 
     def _plot_volcano_regression(self, res_real, variable):
         sig_col = res_real.filter(regex=variable + "_" + "FDR").columns[0]
@@ -115,7 +121,7 @@ def _plot_volcano_regression(self, res_real, variable):
             y=-np.log10(res_real[variable + "_" + "pval"]),
             color=res_real[sig_col],
             color_discrete_map={"sig": "#009599", "non_sig": "#404040"},
-            hover_name=res_real[self.dataset.index_column],
+            hover_name=res_real[self.index_column],
             title=variable,
             labels=dict(x="beta value", y="-log10(p-value)", color=sig_level),
         )
@@ -133,12 +139,12 @@ def calculate(self):
             quant_data=self.transposed,
             annotation=self.metadata,
             covariates=self.covariates,
-            sample_column=self.dataset.sample,
+            sample_column=self.sample,
             n_permutations=self.n_permutations,
             fdr=self.fdr,
             s0=self.s0,
         )
-        res[self.dataset.index_column] = self.dataset.mat.columns.to_list()
+        res[self.index_column] = self.mat.columns.to_list()
         plot_list = []
 
         if self.plot:

diff --git a/alphastats/statistics/StatisticUtils.py b/alphastats/statistics/StatisticUtils.py
diff --git a/tests/test_DataSet.py b/tests/test_DataSet.py
@@ -770,8 +770,6 @@ def test_batch_correction(self):
         first_value = self.obj.mat.values[0, 0]
         self.assertTrue(np.isclose(2.624937690577153e-08, first_value))
 
-    # TODO this opens a plot in a browser window
-    @skip  # TODO multicova_analysis is unused
     def test_multicova_analysis_invalid_covariates(self):
         self.obj.preprocess(imputation="knn", normalization="zscore", subset=True)
         res, _ = self.obj.multicova_analysis(