MannLabs · JuliaS92 · Aug 28, 2024 · Aug 28, 2024 · Aug 28, 2024 · Aug 28, 2024
diff --git a/alphastats/gui/AlphaPeptStats.py b/alphastats/gui/AlphaPeptStats.py
@@ -17,7 +17,7 @@
 
 runtime = get_instance()
 session_id = get_script_run_ctx().session_id
-session_info = runtime._session_mgr.get_session_info(session_id)
+# session_info = runtime._session_mgr.get_session_info(session_id)
 
 user_session_id = session_id
 st.session_state["user_session_id"] = user_session_id

diff --git a/alphastats/gui/pages/02_Import Data.py b/alphastats/gui/pages/02_Import Data.py
@@ -29,7 +29,8 @@
 
 runtime = get_instance()
 session_id = get_script_run_ctx().session_id
-session_info = runtime._session_mgr.get_session_info(session_id)
+# session_info = runtime._session_mgr.get_session_info(session_id)
+# TODO: remove this line at some point if we really don't need it
 
 user_session_id = session_id
 st.session_state["user_session_id"] = user_session_id
@@ -45,12 +46,14 @@
 
 
 def load_options():
+# TODO: Change this to a global variable in all pages that need it, or put it in cache instead to declutter the state
     from alphastats.gui.utils.options import plotting_options, statistic_options
 
     st.session_state["plotting_options"] = plotting_options
     st.session_state["statistic_options"] = statistic_options
 
 
+# TODO: Move this to new data_helper.py in utils
 def check_software_file(df, software):
     """
     check if software files are in right format
@@ -93,7 +96,7 @@ def check_software_file(df, software):
                 "https://fragpipe.nesvilab.org/docs/tutorial_fragpipe_outputs.html#combined_proteintsv"
             )
 
-
+# TODO: Move this to new data_helper.py in utils
 def select_columns_for_loaders(software, software_df: None):
     """
     select intensity and index column depending on software
@@ -141,7 +144,7 @@ def load_proteomics_data(uploaded_file, intensity_column, index_column, software
     )
     return loader
 
-
+# TODO: Move this to new data_helper.py in utils
 def select_sample_column_metadata(df, software):
     samples_proteomics_data = get_sample_names_from_software_file()
     valid_sample_columns = []
@@ -173,7 +176,7 @@ def select_sample_column_metadata(df, software):
             st.stop()
         return True
 
-
+# TODO: Change the behaviour to show full interface already after software selection and only update options after file upload
 def upload_softwarefile(software):
     softwarefile = st.file_uploader(
         software_options.get(software).get("import_file"),
@@ -207,6 +210,8 @@ def upload_softwarefile(software):
             st.session_state["loader"] = loader
 
 
+# TODO: Move this to new data_helper.py in utils
+# TODO: Make this more powerful by making suggestions based on the sample names
 def create_metadata_file():
     dataset = DataSet(loader=st.session_state.loader)
     st.session_state["metadata_columns"] = ["sample"]
@@ -225,7 +230,7 @@ def create_metadata_file():
             mime="application/vnd.ms-excel",
         )
 
-
+# TODO: Serve this interface already upon software selection and only update options after file upload
 def upload_metadatafile(software):
     st.write("\n\n")
     st.markdown("### 3. Prepare Metadata.")
@@ -322,7 +327,7 @@ def import_data():
     if st.session_state.loader is not None:
         upload_metadatafile(st.session_state.software)
 
-
+# TODO: Move this to new data_helper.py in utils
 def display_loaded_dataset():
     st.info("Data was successfully imported")
     st.info("DataSet has been created")
@@ -342,7 +347,7 @@ def display_loaded_dataset():
 
     st.dataframe(df)
 
-
+# TODO: Move this to new data_helper.py in utils
 def save_plot_sampledistribution_rawdata():
     df = st.session_state.dataset.rawmat
     df = df.unstack().reset_index()
@@ -354,7 +359,7 @@ def save_plot_sampledistribution_rawdata():
         df, x=st.session_state.dataset.sample, y="Intensity"
     )
 
-
+# TODO: Move this to new data_helper.py in utils
 def empty_session_state():
     """
     remove all variables to avoid conflicts
@@ -392,7 +397,7 @@ def empty_session_state():
 
 st.markdown("### Or Load sample Dataset")
 
-if st.button("Load sample DataSet - PXD011839"):
+if st.button("Load sample DataSet - PXD011839", key="load_sample_data"):
     st.write(
         """
 

diff --git a/alphastats/gui/pages/03_Data Overview.py b/alphastats/gui/pages/03_Data Overview.py
@@ -2,14 +2,14 @@
 import pandas as pd
 import plotly.express as px
 
-
+#TODO: Cache the whole interface, not each individual plot.
 @st.cache_data
-def convert_df(df, user_session_id=st.session_state.user_session_id):
+def convert_df(df, user_session_id):
     return df.to_csv().encode("utf-8")
 
 
 @st.cache_data
-def get_display_matrix(user_session_id=st.session_state.user_session_id):
+def get_display_matrix(user_session_id):
     processed_df = pd.DataFrame(
         st.session_state.dataset.mat.values,
         index=st.session_state.dataset.mat.index.to_list(),
@@ -31,8 +31,8 @@ def display_matrix():
     st.markdown("**DataFrame used for analysis** *preview*")
     st.markdown(text)
 
-    df = get_display_matrix()
-    csv = convert_df(st.session_state.dataset.mat)
+    df = get_display_matrix(st.session_state.user_session_id)
+    csv = convert_df(st.session_state.dataset.mat, st.session_state.user_session_id)
 
     st.dataframe(df)
 
@@ -42,14 +42,12 @@ def display_matrix():
 
 
 @st.cache_data
-def get_sample_histogram_matrix(user_session_id=st.session_state.user_session_id):
+def get_sample_histogram_matrix(user_session_id):
     return st.session_state.dataset.plot_samplehistograms()
 
 
 @st.cache_data
-def get_intensity_distribution_processed(
-    user_session_id=st.session_state.user_session_id,
-):
+def get_intensity_distribution_processed(user_session_id):
     return st.session_state.dataset.plot_sampledistribution()
 
 
@@ -68,16 +66,12 @@ def get_intensity_distribution_processed(
     with c2:
         st.markdown("**Intensity distribution data per sample used for analysis**")
         st.plotly_chart(
-            get_intensity_distribution_processed(
-                user_session_id=st.session_state.user_session_id
-            ).update_layout(plot_bgcolor="white"),
+            get_intensity_distribution_processed(st.session_state.user_session_id).update_layout(plot_bgcolor="white"),
             use_container_width=True,
         )
 
     st.plotly_chart(
-        get_sample_histogram_matrix(
-            user_session_id=st.session_state.user_session_id
-        ).update_layout(plot_bgcolor="white"),
+        get_sample_histogram_matrix(st.session_state.user_session_id).update_layout(plot_bgcolor="white"),
         use_container_width=True,
     )
 

diff --git a/alphastats/gui/test/test_02_import_data.py b/alphastats/gui/test/test_02_import_data.py
@@ -0,0 +1,135 @@
+from streamlit.testing.v1 import AppTest
+from pathlib import Path
+from unittest.mock import MagicMock, patch
+import pandas as pd
+from io import BytesIO
+
+def print_session_state(apptest: AppTest):
+    for k,v in apptest.session_state.filtered_state.items():
+        print(f"{k}:    {str(type(v))}   {str(v)[:20] if type(v) not in [int, list, str] else v}")
+
+APP_FOLDER = Path(__file__).parent / Path("../")
+APP_FILE = f"{APP_FOLDER}/pages/02_Import Data.py"
+TEST_INPUT_FILES = f"{APP_FOLDER}/../../testfiles"
+
+def test_page_02_loads_without_input():
+    """Test if the page loads without any input and inititalizes the session state with the correct values."""
+    at = AppTest(APP_FILE, default_timeout=200)
+    at.run()
+
+    assert not at.exception
+
+    assert at.session_state.organism == 9606
+    assert at.session_state.user_session_id == 'test session id'
+    assert at.session_state.software == '<select>'
+    assert at.session_state.gene_to_prot_id == {}
+    assert at.session_state.loader == None
+
+@patch("streamlit.file_uploader")
+def test_patched_page_02_loads_without_input(mock_file_uploader: MagicMock):
+    """Test if the page loads without any input and inititalizes the session state with the correct value when the file_uploader is patched."""
+    at = AppTest(APP_FILE, default_timeout=200)
+    at.run()
+
+    assert not at.exception
+
+    assert at.session_state.organism == 9606
+    assert at.session_state.user_session_id == 'test session id'
+    assert at.session_state.software == '<select>'
+    assert at.session_state.gene_to_prot_id == {}
+    assert at.session_state.loader == None
+
+def test_page_02_loads_sample_data():
+    """Test if the page loads the sample data and has the correct session state afterwards."""
+    at = AppTest(APP_FILE, default_timeout=200)
+    at.run()
+
+    # User clicks Load Sample Data button
+    at.button("load_sample_data").click().run()
+
+    assert not at.exception
+
+    assert at.session_state.metadata_columns == ['sample', 'disease', 'Drug therapy (procedure) (416608005)', 'Lipid-lowering therapy (134350008)']
+    assert str(type(at.session_state.dataset)) == "<class 'alphastats.DataSet.DataSet'>"
+    assert at.session_state.software == "<select>"
+    assert str(type(at.session_state.distribution_plot)) == "<class 'plotly.graph_objs._figure.Figure'>"
+    assert str(type(at.session_state.loader)) == "<class 'alphastats.loader.MaxQuantLoader.MaxQuantLoader'>"
+    assert "plotting_options" in at.session_state
+    assert "statistic_options" in at.session_state
+
+
+def _data_buf(path_from_testfiles: str):
+    """Helper function to open a data file from the testfiles folder and return a BytesIO object.
+
+    Additionally add filename as attribute."""
+    with open(f"{TEST_INPUT_FILES}{path_from_testfiles}", "rb") as f:
+        buf = BytesIO(f.read())
+        buf.name = path_from_testfiles.split('/')[-1]
+        return buf
+
+
+def _metadata_buf(path_from_testfiles: str, at: AppTest):
+    """Helper function to open a metadata file from the testfiles folder and return a BytesIO object.
+
+    Additionally add filename as attribute and set the metadatafile in the session state."""
+    with open(f"{TEST_INPUT_FILES}{path_from_testfiles}", "rb") as f:
+        buf = BytesIO(f.read())
+        buf.name = path_from_testfiles.split('/')[-1]
+        at.session_state.metadatafile = buf
+        return buf
+
+
+@patch("streamlit.file_uploader")
+def test_page_02_loads_maxquant_testfiles(mock_file_uploader: MagicMock):
+    """Test if the page loads the MaxQuant testfiles and has the correct session state afterwards.
+
+    No input to the dropdown menus is simulated, hence the default detected values are used.
+    Two states are tested:
+    1. Files are uploaded but not processed yet
+    2. Files are uploaded and processed"""
+    DATA_FILE = "/maxquant/proteinGroups.txt"
+    METADATA_FILE = "/maxquant/metadata.xlsx"
+
+    at = AppTest(APP_FILE, default_timeout=200)
+    at.run()
+
+    # User selects MaxQuant from the dropdown menu
+    at.selectbox(key='software').select('MaxQuant')
+    mock_file_uploader.side_effect = [None]
+    at.run()
+
+    # User uploads the data file
+    mock_file_uploader.side_effect = [_data_buf(DATA_FILE),None]
+    at.run()
+
+    # User uploads the metadata file
+    mock_file_uploader.side_effect = [_data_buf(DATA_FILE),_metadata_buf(METADATA_FILE, at)]
+    at.run()
+
+    assert not at.exception
+
+    assert str(type(at.session_state.loader)) == "<class 'alphastats.loader.MaxQuantLoader.MaxQuantLoader'>"
+    assert at.session_state.intensity_column == 'LFQ intensity [sample]'
+    assert str(type(at.session_state.metadatafile)) == "<class '_io.BytesIO'>"
+    assert at.session_state.software == 'MaxQuant'
+    assert at.session_state.index_column == 'Protein IDs'
+    assert at.session_state.metadata_columns == ['sample']
+    assert at.session_state.sample_column == 'sample'
+
+    # User clicks the Load Data button
+    mock_file_uploader.side_effect = [_data_buf(DATA_FILE),_metadata_buf(METADATA_FILE, at)]
+    at.button('FormSubmitter:sample_column-Create DataSet').click()
+    at.run()
+
+    assert not at.exception
+
+    assert at.session_state.dataset.gene_names == "Gene names"
+    assert at.session_state.dataset.index_column == "Protein IDs"
+    assert at.session_state.dataset.intensity_column == 'LFQ intensity [sample]'
+    assert at.session_state.dataset.rawmat.shape == (312, 2611)
+    assert at.session_state.dataset.software == 'MaxQuant'
+    assert at.session_state.dataset.sample == 'sample'
+    assert str(type(at.session_state.distribution_plot)) == "<class 'plotly.graph_objs._figure.Figure'>"
+    assert str(type(at.session_state.loader)) == "<class 'alphastats.loader.MaxQuantLoader.MaxQuantLoader'>"
+    assert "plotting_options" in at.session_state
+    assert "statistic_options" in at.session_state
diff --git a/alphastats/gui/test/test_03_data_overview.py b/alphastats/gui/test/test_03_data_overview.py
@@ -0,0 +1,20 @@
+from streamlit.testing.v1 import AppTest
+from pathlib import Path
+from unittest.mock import MagicMock, patch
+import pandas as pd
+from io import BytesIO
+
+def print_session_state(apptest: AppTest):
+    for k,v in apptest.session_state.filtered_state.items():
+        print(f"{k}:    {str(type(v))}   {str(v)[:20] if type(v) not in [int, list, str] else v}")
+
+APP_FOLDER = Path(__file__).parent / Path("../")
+APP_FILE = f"{APP_FOLDER}/pages/03_Data Overview.py"
+TEST_INPUT_FILES = f"{APP_FOLDER}/../../testfiles"
+
+def test_page_03_loads_without_input():
+    """Test if the page loads without any input and inititalizes the session state with the correct values."""
+    at = AppTest(APP_FILE, default_timeout=200)
+    at.run()
+
+    assert not at.exception
diff --git a/alphastats/gui/utils/analysis_helper.py b/alphastats/gui/utils/analysis_helper.py
@@ -62,15 +62,15 @@ def download_figure(obj, format, plotting_library="plotly"):
 
 
 @st.cache_data
-def convert_df(df, user_session_id=st.session_state.user_session_id):
+def convert_df(df, user_session_id):
     return df.to_csv().encode("utf-8")
 
 
 def download_preprocessing_info(plot):
     preprocesing_dict = plot[1].preprocessing
     df = pd.DataFrame(preprocesing_dict.items())
     filename = "plot" + plot[0] + "preprocessing_info.csv"
-    csv = convert_df(df)
+    csv = convert_df(df, st.session_state.user_session_id)
     st.download_button(
         "Download DataSet Info as .csv",
         csv,
@@ -134,7 +134,7 @@ def st_general(method_dict):
 
 @st.cache_data
 def gui_volcano_plot_differential_expression_analysis(
-    chosen_parameter_dict, user_session_id=st.session_state.user_session_id
+    chosen_parameter_dict, user_session_id
 ):
     """
     initalize volcano plot object with differential expression analysis results