Skip to content

Commit

Permalink
add PreprocessInterface
Browse files Browse the repository at this point in the history
  • Loading branch information
mschwoer committed Sep 13, 2024
1 parent 9c52689 commit d8aeea7
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 5 deletions.
28 changes: 24 additions & 4 deletions alphastats/DataSet.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@


from alphastats.DataSet_Plot import Plot
from alphastats.DataSet_Preprocess import Preprocess
from alphastats.DataSet_Preprocess import Preprocess, PreprocessInterface
from alphastats.DataSet_Pathway import Enrichment
from alphastats.DataSet_Statistics import Statistics
from alphastats.utils import LoaderError
Expand All @@ -34,7 +34,7 @@
plotly.io.templates.default = "simple_white+alphastats_colors"


class DataSet(Statistics, Plot, Enrichment):
class DataSet(PreprocessInterface, Statistics, Plot, Enrichment):
"""Analysis Object"""

def __init__(self, loader, metadata_path=None, sample_column=None):
Expand Down Expand Up @@ -83,7 +83,18 @@ def __init__(self, loader, metadata_path=None, sample_column=None):
print("DataSet has been created.")
self.overview()

def preprocess(self, **kwargs):
def preprocess(
self,
log2_transform: bool = True,
remove_contaminations: bool = False,
subset: bool = False,
data_completeness: float = 0,
normalization: str = None,
imputation: str = None,
remove_samples: list = None,
**kwargs,
):
"""See documentation in the class implementing PreprocessInterface."""
pp = Preprocess(
self.filter_columns,
self.rawinput,
Expand All @@ -94,7 +105,16 @@ def preprocess(self, **kwargs):
self.mat,
)

self.mat, self.metadata, self.preprocessing_info = pp.preprocess(**kwargs)
self.mat, self.metadata, self.preprocessing_info = pp.preprocess(
log2_transform,
remove_contaminations,
subset,
data_completeness,
normalization,
imputation,
remove_samples,
**kwargs,
)
self.preprocessed = True

def reset_preprocessing(self):
Expand Down
19 changes: 18 additions & 1 deletion alphastats/DataSet_Preprocess.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import logging
from abc import ABC, abstractmethod

import numpy as np
import pandas as pd
Expand All @@ -12,7 +13,23 @@
from alphastats.utils import ignore_warning


class Preprocess:
class PreprocessInterface(ABC):
@abstractmethod
def preprocess(
self,
log2_transform: bool,
remove_contaminations: bool,
subset: bool,
data_completeness: float,
normalization: str,
imputation: str,
remove_samples: list,
**kwargs,
):
pass


class Preprocess(PreprocessInterface):
imputation_methods = ["mean", "median", "knn", "randomforest"]
normalization_methods = ["vst", "zscore", "quantile"]

Expand Down

0 comments on commit d8aeea7

Please sign in to comment.