diff --git a/alphastats/DataSet.py b/alphastats/DataSet.py index 98ea4a40..20a7d087 100644 --- a/alphastats/DataSet.py +++ b/alphastats/DataSet.py @@ -8,7 +8,7 @@ from alphastats.DataSet_Plot import Plot -from alphastats.DataSet_Preprocess import Preprocess +from alphastats.DataSet_Preprocess import Preprocess, PreprocessInterface from alphastats.DataSet_Pathway import Enrichment from alphastats.DataSet_Statistics import Statistics from alphastats.utils import LoaderError @@ -34,7 +34,7 @@ plotly.io.templates.default = "simple_white+alphastats_colors" -class DataSet(Statistics, Plot, Enrichment): +class DataSet(PreprocessInterface, Statistics, Plot, Enrichment): """Analysis Object""" def __init__(self, loader, metadata_path=None, sample_column=None): @@ -83,7 +83,18 @@ def __init__(self, loader, metadata_path=None, sample_column=None): print("DataSet has been created.") self.overview() - def preprocess(self, **kwargs): + def preprocess( + self, + log2_transform: bool = True, + remove_contaminations: bool = False, + subset: bool = False, + data_completeness: float = 0, + normalization: str = None, + imputation: str = None, + remove_samples: list = None, + **kwargs, + ): + """See documentation in the class implementing PreprocessInterface.""" pp = Preprocess( self.filter_columns, self.rawinput, @@ -94,7 +105,16 @@ def preprocess(self, **kwargs): self.mat, ) - self.mat, self.metadata, self.preprocessing_info = pp.preprocess(**kwargs) + self.mat, self.metadata, self.preprocessing_info = pp.preprocess( + log2_transform, + remove_contaminations, + subset, + data_completeness, + normalization, + imputation, + remove_samples, + **kwargs, + ) self.preprocessed = True def reset_preprocessing(self): diff --git a/alphastats/DataSet_Preprocess.py b/alphastats/DataSet_Preprocess.py index 970a066a..b3bbe5c6 100644 --- a/alphastats/DataSet_Preprocess.py +++ b/alphastats/DataSet_Preprocess.py @@ -1,4 +1,5 @@ import logging +from abc import ABC, abstractmethod import numpy as np import pandas as pd @@ -12,7 +13,23 @@ from alphastats.utils import ignore_warning -class Preprocess: +class PreprocessInterface(ABC): + @abstractmethod + def preprocess( + self, + log2_transform: bool, + remove_contaminations: bool, + subset: bool, + data_completeness: float, + normalization: str, + imputation: str, + remove_samples: list, + **kwargs, + ): + pass + + +class Preprocess(PreprocessInterface): imputation_methods = ["mean", "median", "knn", "randomforest"] normalization_methods = ["vst", "zscore", "quantile"]