diff --git a/alphastats/DataSet_Preprocess.py b/alphastats/DataSet_Preprocess.py
index 24229126..b89fd887 100644
--- a/alphastats/DataSet_Preprocess.py
+++ b/alphastats/DataSet_Preprocess.py
@@ -1,16 +1,16 @@
-from random import random
-import pandas as pd
-import sklearn
+import itertools
 import logging
+
 import numpy as np
+import pandas as pd
+import sklearn
 import sklearn.ensemble
 import sklearn.impute
-from alphastats.utils import ignore_warning
-from sklearn.experimental import enable_iterative_imputer
-import itertools
-
 import streamlit as st
 
+from sklearn.experimental import enable_iterative_imputer
+from alphastats.utils import ignore_warning
+
 
 class Preprocess:
     def _remove_sampels(self, sample_list: list):
@@ -31,9 +31,14 @@ def preprocess_print_info(self):
         print(pd.DataFrame(self.preprocessing_info.items()))
 
     def _remove_na_values(self, cut_off):
-        if self.preprocessing_info.get("Missing values were removed") and self.preprocessing_info.get("Data completeness cut-off") == cut_off:
+        if (
+            self.preprocessing_info.get("Missing values were removed")
+            and self.preprocessing_info.get("Data completeness cut-off") == cut_off
+        ):
             logging.info("Missing values have already been filtered.")
-            st.warning("Missing values have already been filtered. To apply another cutoff, reset preprocessing.")
+            st.warning(
+                "Missing values have already been filtered. To apply another cutoff, reset preprocessing."
+            )
             return
         cut = 1 - cut_off
 
@@ -59,25 +64,25 @@ def _remove_na_values(self, cut_off):
 
         self.preprocessing_info.update(
             {
-                "Number of removed ProteinGroups due to data completeness cutoff": num_proteins - self.mat.shape[1],
+                "Number of removed ProteinGroups due to data completeness cutoff": num_proteins
+                - self.mat.shape[1],
                 "Missing values were removed": True,
                 "Data completeness cut-off": cut_off,
             }
         )
-       
 
     def _filter(self):
         if len(self.filter_columns) == 0:
             logging.info("No columns to filter.")
             return
 
-        if self.preprocessing_info.get("Contaminations have been removed") == True:
+        if self.preprocessing_info.get("Contaminations have been removed"):
             logging.info("Contaminatons have already been filtered.")
             return
 
         #  print column names with contamination
         protein_groups_to_remove = self.rawinput[
-            (self.rawinput[self.filter_columns] == True).any(axis=1)
+            self.rawinput[self.filter_columns].any(axis=1)
         ][self.index_column].tolist()
 
         protein_groups_to_remove = list(
@@ -186,10 +191,11 @@ def _linear_normalization(self, array):
     @ignore_warning(UserWarning)
     @ignore_warning(RuntimeWarning)
     def _normalization(self, method: str):
-
         if method == "zscore":
             scaler = sklearn.preprocessing.StandardScaler()
-            normalized_array = scaler.fit_transform(self.mat.values.transpose()).transpose()
+            normalized_array = scaler.fit_transform(
+                self.mat.values.transpose()
+            ).transpose()
 
         elif method == "quantile":
             qt = sklearn.preprocessing.QuantileTransformer(random_state=0)
@@ -268,7 +274,6 @@ def batch_correction(self, batch: str):
         Args:
             batch (str): column name in the metadata describing the different batches
         """
-        import combat
         from combat.pycombat import pycombat
 
         data = self.mat.transpose()
diff --git a/alphastats/gui/pages/02_Import Data.py b/alphastats/gui/pages/02_Import Data.py
index a700f428..1e6322b1 100644
--- a/alphastats/gui/pages/02_Import Data.py	
+++ b/alphastats/gui/pages/02_Import Data.py	
@@ -1,26 +1,30 @@
-import streamlit as st
-import sys
-import os
 import io
+import os
+
+import streamlit as st
 
 try:
-    from alphastats.gui.utils.ui_helper import sidebar_info
-    from alphastats.gui.utils.analysis_helper import *
+    from alphastats.DataSet import DataSet
+    from alphastats.gui.utils.analysis_helper import (
+        get_sample_names_from_software_file,
+        read_uploaded_file_into_df,
+    )
     from alphastats.gui.utils.software_options import software_options
+    from alphastats.gui.utils.ui_helper import sidebar_info
     from alphastats.loader.MaxQuantLoader import MaxQuantLoader
-    from alphastats.DataSet import DataSet
 
 except ModuleNotFoundError:
     from utils.ui_helper import sidebar_info
-    from utils.analysis_helper import *
+    from utils.analysis_helper import (
+        get_sample_names_from_software_file,
+        read_uploaded_file_into_df,
+    )
     from utils.software_options import software_options
     from alphastats import MaxQuantLoader
     from alphastats import DataSet
 
-
 import pandas as pd
 import plotly.express as px
-
 from streamlit.runtime import get_instance
 from streamlit.runtime.scriptrunner.script_run_context import get_script_run_ctx
 
@@ -56,7 +60,7 @@ def check_software_file(df, software):
 
     if software == "MaxQuant":
         expected_columns = ["Protein IDs", "Reverse", "Potential contaminant"]
-        if (set(expected_columns).issubset(set(df.columns.to_list()))) == False:
+        if not set(expected_columns).issubset(set(df.columns.to_list())):
             st.error(
                 "This is not a valid MaxQuant file. Please check:"
                 "http://www.coxdocs.org/doku.php?id=maxquant:table:proteingrouptable"
@@ -71,7 +75,7 @@ def check_software_file(df, software):
             "Protein.Group",
         ]
 
-        if (set(expected_columns).issubset(set(df.columns.to_list()))) == False:
+        if not set(expected_columns).issubset(set(df.columns.to_list())):
             st.error("This is not a valid DIA-NN file.")
 
     elif software == "Spectronaut":
@@ -79,12 +83,12 @@ def check_software_file(df, software):
             "PG.ProteinGroups",
         ]
 
-        if (set(expected_columns).issubset(set(df.columns.to_list()))) == False:
+        if not set(expected_columns).issubset(set(df.columns.to_list())):
             st.error("This is not a valid Spectronaut file.")
 
     elif software == "FragPipe":
         expected_columns = ["Protein"]
-        if (set(expected_columns).issubset(set(df.columns.to_list()))) == False:
+        if not set(expected_columns).issubset(set(df.columns.to_list())):
             st.error(
                 "This is not a valid FragPipe file. Please check:"
                 "https://fragpipe.nesvilab.org/docs/tutorial_fragpipe_outputs.html#combined_proteintsv"
@@ -145,7 +149,6 @@ def select_sample_column_metadata(df, software):
 
     for col in df.columns.to_list():
         if bool(set(samples_proteomics_data) & set(df[col].to_list())):
-            print("comparing lengths", len(samples_proteomics_data), len(df[col].to_list()))
             valid_sample_columns.append(col)
 
     if len(valid_sample_columns) == 0:
@@ -155,16 +158,18 @@ def select_sample_column_metadata(df, software):
         )
 
     st.write(
-        f"Select column that contains sample IDs matching the sample names described "
+        "Select column that contains sample IDs matching the sample names described "
         + f"in {software_options.get(software).get('import_file')}"
     )
 
     with st.form("sample_column"):
         st.selectbox("Sample Column", options=valid_sample_columns, key="sample_column")
         submitted = st.form_submit_button("Create DataSet")
-        
+
     if submitted:
-        if len(df[st.session_state.sample_column].to_list()) != len(df[st.session_state.sample_column].unique()):
+        if len(df[st.session_state.sample_column].to_list()) != len(
+            df[st.session_state.sample_column].unique()
+        ):
             st.error("Sample names have to be unique.")
             st.stop()
         return True
@@ -212,8 +217,6 @@ def create_metadata_file():
     with pd.ExcelWriter(buffer, engine="xlsxwriter") as writer:
         # Write each dataframe to a different worksheet.
         metadata.to_excel(writer, sheet_name="Sheet1", index=False)
-        # Close the Pandas Excel writer and output the Excel file to the buffer
-        # writer.close()
 
         st.download_button(
             label="Download metadata template as Excel",
@@ -249,14 +252,8 @@ def upload_metadatafile(software):
                 sample_column=st.session_state.sample_column,
             )
             st.session_state["metadata_columns"] = metadatafile_df.columns.to_list()
-            # if len(st.session_state["dataset"].metadata[self.sample].tolist()) != len(self.metadata[self.sample].unique()):
-            #     st.error("Sample names have to be unique.")
-
-
             load_options()
 
-            # display_loaded_dataset()
-
     if st.session_state.loader is not None:
         create_metadata_file()
         st.write(
@@ -272,8 +269,6 @@ def upload_metadatafile(software):
 
             load_options()
 
-            # display_loaded_dataset()
-
 
 def load_sample_data():
     _this_file = os.path.abspath(__file__)
@@ -319,7 +314,6 @@ def import_data():
         options=options,
         key="software",
     )
-    session_state_empty = False
 
     if st.session_state.software != "<select>":
         upload_softwarefile(software=st.session_state.software)
@@ -336,10 +330,10 @@ def display_loaded_dataset():
     st.markdown(f"*Preview:* Raw data from {st.session_state.dataset.software}")
     st.dataframe(st.session_state.dataset.rawinput.head(5))
 
-    st.markdown(f"*Preview:* Metadata")
+    st.markdown("*Preview:* Metadata")
     st.dataframe(st.session_state.dataset.metadata.head(5))
 
-    st.markdown(f"*Preview:* Matrix")
+    st.markdown("*Preview:* Matrix")
 
     df = pd.DataFrame(
         st.session_state.dataset.mat.values,
@@ -370,7 +364,6 @@ def empty_session_state():
     st.empty()
     st.session_state["software"] = "<select>"
 
-    from streamlit.runtime import get_instance
     from streamlit.runtime.scriptrunner.script_run_context import get_script_run_ctx
 
     user_session_id = get_script_run_ctx().session_id
@@ -379,8 +372,6 @@ def empty_session_state():
 
 sidebar_info()
 
-# import_data()
-
 
 if "dataset" not in st.session_state:
     st.markdown("### Import Proteomics Data")
@@ -389,7 +380,6 @@ def empty_session_state():
         "Create a DataSet with the output of your proteomics software package and the corresponding metadata (optional). "
     )
 
-    # import_data()
 import_data()
 
 if "dataset" in st.session_state:
diff --git a/alphastats/gui/pages/03_Preprocessing.py b/alphastats/gui/pages/03_Preprocessing.py
index f55374c5..8a927ccb 100644
--- a/alphastats/gui/pages/03_Preprocessing.py
+++ b/alphastats/gui/pages/03_Preprocessing.py
@@ -12,7 +12,7 @@ def preprocessing():
 
         st.markdown(
             "Before analyzing your data, consider normalizing and imputing your data as well as the removal of contaminants. "
-            + "A more detailed description about the preprocessing methods can be found in the AlphaPeptStats "
+            + "A more detailed description about the preprocessing methods can be found in the AlphaPeptStats " 
             + "[documentation](https://alphapeptstats.readthedocs.io/en/main/data_preprocessing.html)."
         )
 
@@ -30,23 +30,17 @@ def preprocessing():
             )
 
             remove_samples = st.multiselect(
-                "Remove samples from analysis",
-                options=st.session_state.dataset.metadata[
-                    st.session_state.dataset.sample
-                ].to_list(),
+                "Remove samples from analysis", 
+                options=st.session_state.dataset.metadata[st.session_state.dataset.sample].to_list()
             )
 
             data_completeness = st.number_input(
                 f"Data completeness across samples cut-off \n(0.7 -> protein has to be detected in at least 70% of the samples)",
-                value=0.0,
-                min_value=0.0,
-                max_value=1.0,
-                step=0.1,
+                value=0, min_value=0, max_value=1
             )
 
             log2_transform = st.selectbox(
-                "Log2-transform dataset",
-                options=[True, False],
+                "Log2-transform dataset", options=[True, False],
             )
 
             normalization = st.selectbox(
@@ -62,64 +56,57 @@ def preprocessing():
         if submitted:
             if len(remove_samples) == 0:
                 remove_samples = None
-
+            
             st.session_state.dataset.preprocess(
                 remove_contaminations=remove_contaminations,
                 log2_transform=log2_transform,
-                remove_samples=remove_samples,
+                remove_samples = remove_samples,
                 data_completeness=data_completeness,
                 subset=subset,
                 normalization=normalization,
                 imputation=imputation,
             )
-             
-            st.session_state["preprocessing_info"] = st.session_state.dataset.preprocessing_info
-            
-
-    if submitted or "preprocessing_info" in st.session_state:
-        st.info(
+            preprocessing = st.session_state.dataset.preprocessing_info
+            st.info(
                 "Data has been processed. "
                 + datetime.datetime.now().strftime("%d/%m/%Y %H:%M:%S")
             )
-        st.dataframe(
-                pd.DataFrame.from_dict(st.session_state["preprocessing_info"], orient="index").astype(str),
-                use_container_width=True,
-            )
-    with c2:
-
-        if submitted:
-            st.markdown("**Intensity Distribution after preprocessing per sample**")
-            fig_processed = st.session_state.dataset.plot_sampledistribution()
-            st.plotly_chart(
-                fig_processed.update_layout(plot_bgcolor="white"),
-                use_container_width=True,
-            )
-
-        else:
-            st.markdown("**Intensity Distribution per sample**")
-            fig_none_processed = st.session_state.dataset.plot_sampledistribution()
-            st.plotly_chart(
-                fig_none_processed.update_layout(plot_bgcolor="white"),
+            st.dataframe(
+                pd.DataFrame.from_dict(preprocessing, orient="index").astype(str),
                 use_container_width=True,
             )
-    c1, c2 = st.columns(2)
-    with c1:    
+        
         st.markdown("#### Batch correction: correct for technical bias")
 
         with st.form("Batch correction: correct for technical bias"):
             batch = st.selectbox(
-                "Batch", options=st.session_state.dataset.metadata.columns.to_list()
+                "Batch", 
+                options= st.session_state.dataset.metadata.columns.to_list()
             )
             submit_batch_correction = st.form_submit_button("Submit")
-
+        
         if submit_batch_correction:
-            st.session_state.dataset.batch_correction(batch=batch)
+            st.session_state.dataset.batch_correction(
+                batch=batch
+            )
             st.info(
                 "Data has been processed. "
                 + datetime.datetime.now().strftime("%d/%m/%Y %H:%M:%S")
             )
 
     
+    with c2:
+
+        if submitted:
+            st.markdown("**Intensity Distribution after preprocessing per sample**")
+            fig_processed = st.session_state.dataset.plot_sampledistribution()
+            st.plotly_chart(fig_processed.update_layout(plot_bgcolor="white"), use_container_width=True)
+        
+        else:
+            st.markdown("**Intensity Distribution per sample**")
+            fig_none_processed = st.session_state.dataset.plot_sampledistribution()
+            st.plotly_chart(fig_none_processed.update_layout(plot_bgcolor="white"), use_container_width=True)
+        
 
     reset_steps = st.button("Reset all Preprocessing steps")
 
@@ -129,12 +116,14 @@ def preprocessing():
 
 def reset_preprocessing():
     st.session_state.dataset.create_matrix()
+    preprocessing = st.session_state.dataset.preprocessing_info
     st.info(
         "Data has been reset. " + datetime.datetime.now().strftime("%d/%m/%Y %H:%M:%S")
     )
-    st.session_state["preprocessing_info"] = st.session_state.dataset.preprocessing_info
-    # reset the page
-    st.rerun()
+    st.dataframe(
+        pd.DataFrame.from_dict(preprocessing, orient="index").astype(str),
+        use_container_width=True,
+    )
 
 
 def main_preprocessing():
@@ -156,4 +145,4 @@ def main_preprocessing():
 def plot_intensity_distribution():
     st.selectbox(
         "Sample", options=st.session_state.dataset.metadata["sample"].to_list()
-    )
+    )
\ No newline at end of file
diff --git a/tests/test_DataSet.py b/tests/test_DataSet.py
index 559e6161..4aef9607 100644
--- a/tests/test_DataSet.py
+++ b/tests/test_DataSet.py
@@ -270,9 +270,9 @@ def test_preprocess_normalize_zscore(self):
         self.obj.preprocess(log2_transform=False, normalization="zscore")
         expected_mat = pd.DataFrame(
             {
-                "a": [-1.33630621, 1.06904497, 0.26726124],
-                "b": [1.41421356, -0.70710678, -0.70710678],
-                "c": [-1.38873015, 0.9258201, 0.46291005],
+                "a": [-0.162221, -0.508001, -0.707107],
+                "b": [1.297771, -0.889001, -0.707107],
+                "c": [-1.135550, 1.397001, 1.414214],
             }
         )
         pd._testing.assert_frame_equal(self.obj.mat, expected_mat)
@@ -282,7 +282,9 @@ def test_preprocess_normalize_quantile(self):
         # Quantile Normalization
         self.obj.preprocess(log2_transform=False, normalization="quantile")
         expected_mat = pd.DataFrame(
-            {"a": [0.0, 1.0, 0.5], "b": [1.0, 0.0, 0.0], "c": [0.0, 1.0, 0.5]}
+            {"a": [0.5, 0.5, 0.0], 
+             "b": [1.0, 0.0, 0.0], 
+             "c": [0.0, 1.0, 1.0]}
         )
         pd._testing.assert_frame_equal(self.obj.mat, expected_mat)
 
@@ -306,9 +308,9 @@ def test_preprocess_normalize_vst(self):
         self.obj.preprocess(log2_transform=False, normalization="vst")
         expected_mat = pd.DataFrame(
             {
-                "a": [-1.307734, 1.120100, 0.187634],
-                "b": [	1.414214, -0.707107, -0.707107],
-                "c": [-1.360307, 1.015077, 0.345230],
+                "a": [-0.009526, -0.236399, -0.707107],
+                "b": [	1.229480, -1.089313, -0.707107],
+                "c": [-1.219954, 1.325712, 1.414214],
             }
         )
         pd._testing.assert_frame_equal(self.obj.mat.round(2), expected_mat.round(2))
@@ -507,7 +509,7 @@ def test_plot_intenstity_subgroup_significance_warning(self, mock):
         )
         plot_dict = plot.to_plotly_json()
         self.assertEqual(len(plot_dict.get("data")), 5)
-        mock.assert_called_once()
+        self.assertEqual(mock.call_count, 2)
 
     def test_anova_with_tukey(self):
         # with first 100 protein ids
@@ -577,8 +579,8 @@ def test_plot_volcano_sam(self):
         )
 
         # fdr lines get drawn
-        line_1 = plot.to_plotly_json()["data"][3].get("line").get("shape")
-        line_2 = plot.to_plotly_json()["data"][4].get("line").get("shape")
+        line_1 = plot.to_plotly_json()["data"][-2].get("line").get("shape")
+        line_2 = plot.to_plotly_json()["data"][-1].get("line").get("shape")
 
         self.assertEqual(line_1, "spline")
         self.assertEqual(line_2, "spline")
@@ -739,10 +741,10 @@ def test_plot_samplehistograms(self):
         self.assertEqual(312, len(fig["data"]))
 
     def test_batch_correction(self):
-        self.obj.preprocess(subset=True, imputation="knn", normalization="quantile")
+        self.obj.preprocess(subset=True, imputation="knn", normalization="linear")
         self.obj.batch_correction(batch="batch_artifical_added")
         first_value = self.obj.mat.values[0, 0]
-        self.assertAlmostEqual(0.0111, first_value, places=2)
+        self.assertAlmostEqual(-0.00555, first_value, places=3)
 
     def test_multicova_analysis_invalid_covariates(self):
         self.obj.preprocess(imputation="knn", normalization="zscore", subset=True)