diff --git a/alphastats/gui/pages/02_Import Data.py b/alphastats/gui/pages/02_Import Data.py index b3b25957..27d0749d 100644 --- a/alphastats/gui/pages/02_Import Data.py +++ b/alphastats/gui/pages/02_Import Data.py @@ -11,6 +11,13 @@ empty_session_state, ) from alphastats.gui.utils.ui_helper import sidebar_info + from alphastats.DataSet import DataSet + from alphastats.gui.utils.analysis_helper import ( + get_sample_names_from_software_file, + read_uploaded_file_into_df, + ) + from alphastats.gui.utils.ui_helper import sidebar_info, StateKeys + from alphastats.loader.MaxQuantLoader import MaxQuantLoader except ModuleNotFoundError: from utils.ui_helper import sidebar_info @@ -22,24 +29,34 @@ empty_session_state, ) + from utils.ui_helper import sidebar_info, StateKeys + from utils.analysis_helper import ( + get_sample_names_from_software_file, + read_uploaded_file_into_df, + ) + from alphastats import MaxQuantLoader + from alphastats import DataSet + +import pandas as pd +import plotly.express as px from streamlit.runtime import get_instance from streamlit.runtime.scriptrunner.script_run_context import get_script_run_ctx runtime = get_instance() session_id = get_script_run_ctx().session_id -session_info = runtime._session_mgr.get_session_info(session_id) +# session_info = runtime._session_mgr.get_session_info(session_id) user_session_id = session_id -st.session_state["user_session_id"] = user_session_id +st.session_state[StateKeys.USER_SESSION_ID] = user_session_id if "loader" not in st.session_state: - st.session_state["loader"] = None + st.session_state[StateKeys.LOADER] = None if "gene_to_prot_id" not in st.session_state: - st.session_state["gene_to_prot_id"] = {} + st.session_state[StateKeys.GENE_TO_PROT_ID] = {} if "organism" not in st.session_state: - st.session_state["organism"] = 9606 # human + st.session_state[StateKeys.ORGANISM] = 9606 # human sidebar_info() diff --git a/alphastats/gui/pages/03_Data Overview.py b/alphastats/gui/pages/03_Data Overview.py index 267157e2..c24885b8 100644 --- a/alphastats/gui/pages/03_Data Overview.py +++ b/alphastats/gui/pages/03_Data Overview.py @@ -8,7 +8,7 @@ get_intensity_distribution_processed, get_sample_histogram_matrix, ) - from alphastats.gui.utils.ui_helper import sidebar_info + from alphastats.gui.utils.ui_helper import sidebar_info, StateKeys except ModuleNotFoundError: from utils.overview_helper import ( @@ -16,11 +16,11 @@ get_intensity_distribution_processed, get_sample_histogram_matrix, ) - from utils.ui_helper import sidebar_info + from utils.ui_helper import sidebar_info, StateKeys sidebar_info() -if "dataset" in st.session_state: +if StateKeys.DATASET in st.session_state: st.markdown("## DataSet overview") c1, c2 = st.columns(2) @@ -28,7 +28,7 @@ with c1: st.markdown("**Intensity distribution raw data per sample**") st.plotly_chart( - st.session_state.distribution_plot.update_layout(plot_bgcolor="white"), + st.session_state[StateKeys.DISTRIBUTION_PLOT].update_layout(plot_bgcolor="white"), use_container_width=True, ) @@ -36,14 +36,14 @@ st.markdown("**Intensity distribution data per sample used for analysis**") st.plotly_chart( get_intensity_distribution_processed( - user_session_id=st.session_state.user_session_id + user_session_id=st.session_state[StateKeys.USER_SESSION_ID] ).update_layout(plot_bgcolor="white"), use_container_width=True, ) st.plotly_chart( get_sample_histogram_matrix( - user_session_id=st.session_state.user_session_id + user_session_id=st.session_state[StateKeys.USER_SESSION_ID] ).update_layout(plot_bgcolor="white"), use_container_width=True, ) diff --git a/alphastats/gui/utils/import_helper.py b/alphastats/gui/utils/import_helper.py index 8de82721..2c323584 100644 --- a/alphastats/gui/utils/import_helper.py +++ b/alphastats/gui/utils/import_helper.py @@ -7,6 +7,7 @@ try: from alphastats.DataSet import DataSet + from alphastats.gui.utils.ui_helper import StateKeys from alphastats.gui.utils.analysis_helper import ( get_sample_names_from_software_file, read_uploaded_file_into_df, @@ -15,6 +16,7 @@ from alphastats.loader.MaxQuantLoader import MaxQuantLoader except ModuleNotFoundError: + from utils.ui_helper import StateKeys from utils.analysis_helper import ( get_sample_names_from_software_file, read_uploaded_file_into_df, @@ -27,8 +29,8 @@ def load_options(): from alphastats.gui.utils.options import plotting_options, statistic_options - st.session_state["plotting_options"] = plotting_options(st.session_state) - st.session_state["statistic_options"] = statistic_options(st.session_state) + st.session_state[StateKeys.PLOTTING_OPTIONS] = plotting_options(st.session_state) + st.session_state[StateKeys.STATISTIC_OPTIONS] = statistic_options(st.session_state) def load_proteomics_data(uploaded_file, intensity_column, index_column, software): @@ -60,16 +62,16 @@ def upload_softwarefile(software): select_columns_for_loaders(software=software, software_df=softwarefile_df) if ( - "intensity_column" in st.session_state - and "index_column" in st.session_state + StateKeys.INTENSITY_COLUMN in st.session_state + and StateKeys.INDEX_COLUMN in st.session_state ): loader = load_proteomics_data( softwarefile_df, - intensity_column=st.session_state.intensity_column, - index_column=st.session_state.index_column, + intensity_column=st.session_state[StateKeys.INTENSITY_COLUMN], + index_column=st.session_state[StateKeys.INDEX_COLUMN], software=software, ) - st.session_state["loader"] = loader + st.session_state[StateKeys.LOADER] = loader def upload_metadatafile(software): @@ -80,8 +82,13 @@ def upload_metadatafile(software): key="metadatafile", ) - if metadatafile_upload is not None and st.session_state.loader is not None: - metadatafile_df = read_uploaded_file_into_df(st.session_state.metadatafile) + if ( + metadatafile_upload is not None + and st.session_state[StateKeys.LOADER] is not None + ): + metadatafile_df = read_uploaded_file_into_df( + st.session_state[StateKeys.METADATAFILE] + ) # display metadata st.write( f"File successfully uploaded. Number of rows: {metadatafile_df.shape[0]}" @@ -92,15 +99,17 @@ def upload_metadatafile(software): if select_sample_column_metadata(metadatafile_df, software): # create dataset - st.session_state["dataset"] = DataSet( - loader=st.session_state.loader, + st.session_state[StateKeys.DATASET] = DataSet( + loader=st.session_state[StateKeys.LOADER], metadata_path=metadatafile_df, - sample_column=st.session_state.sample_column, + sample_column=st.session_state[StateKeys.SAMPLE_COLUMN], + ) + st.session_state[StateKeys.METADATA_COLUMNS] = ( + metadatafile_df.columns.to_list() ) - st.session_state["metadata_columns"] = metadatafile_df.columns.to_list() load_options() - if st.session_state.loader is not None: + if st.session_state[StateKeys.LOADER] is not None: create_metadata_file() st.write( "Download the template file and add additional information as " @@ -108,10 +117,12 @@ def upload_metadatafile(software): + "Upload the updated metadata file." ) - if st.session_state.loader is not None: + if st.session_state[StateKeys.LOADER] is not None: if st.button("Create a DataSet without metadata"): - st.session_state["dataset"] = DataSet(loader=st.session_state.loader) - st.session_state["metadata_columns"] = ["sample"] + st.session_state[StateKeys.DATASET] = DataSet( + loader=st.session_state[StateKeys.LOADER] + ) + st.session_state[StateKeys.METADATA_COLUMNS] = ["sample"] load_options() @@ -170,9 +181,9 @@ def load_sample_data(): ] ] ds.preprocess(subset=True) - st.session_state["loader"] = loader - st.session_state["metadata_columns"] = ds.metadata.columns.to_list() - st.session_state["dataset"] = ds + st.session_state[StateKeys.LOADER] = loader + st.session_state[StateKeys.METADATA_COLUMNS] = ds.metadata.columns.to_list() + st.session_state[StateKeys.DATASET] = ds load_options() @@ -186,43 +197,45 @@ def import_data(): key="software", ) - if st.session_state.software != "": + upload_softwarefile(software=st.session_state[StateKeys.SOFTWARE]) if "loader" not in st.session_state: - st.session_state["loader"] = None - if st.session_state.loader is not None: - upload_metadatafile(st.session_state.software) + st.session_state[StateKeys.LOADER] = None + if st.session_state[StateKeys.LOADER] is not None: + upload_metadatafile(st.session_state[StateKeys.SOFTWARE]) def display_loaded_dataset(): st.info("Data was successfully imported") st.info("DataSet has been created") - st.markdown(f"*Preview:* Raw data from {st.session_state.dataset.software}") - st.dataframe(st.session_state.dataset.rawinput.head(5)) + st.markdown( + f"*Preview:* Raw data from {st.session_state[StateKeys.DATASET].software}" + ) + st.dataframe(st.session_state[StateKeys.DATASET].rawinput.head(5)) st.markdown("*Preview:* Metadata") - st.dataframe(st.session_state.dataset.metadata.head(5)) + st.dataframe(st.session_state[StateKeys.DATASET].metadata.head(5)) st.markdown("*Preview:* Matrix") df = pd.DataFrame( - st.session_state.dataset.mat.values, - index=st.session_state.dataset.mat.index.to_list(), + st.session_state[StateKeys.DATASET].mat.values, + index=st.session_state[StateKeys.DATASET].mat.index.to_list(), ).head(5) st.dataframe(df) def save_plot_sampledistribution_rawdata(): - df = st.session_state.dataset.rawmat + df = st.session_state[StateKeys.DATASET].rawmat df = df.unstack().reset_index() df.rename( - columns={"level_1": st.session_state.dataset.sample, 0: "Intensity"}, + columns={"level_1": st.session_state[StateKeys.DATASET].sample, 0: "Intensity"}, inplace=True, ) - st.session_state["distribution_plot"] = px.violin( - df, x=st.session_state.dataset.sample, y="Intensity" + st.session_state[StateKeys.DISTRIBUTION_PLOT] = px.violin( + df, x=st.session_state[StateKeys.DATASET].sample, y="Intensity" ) @@ -233,12 +246,12 @@ def empty_session_state(): for key in st.session_state.keys(): del st.session_state[key] st.empty() - st.session_state["software"] = "" from streamlit.runtime.scriptrunner.script_run_context import get_script_run_ctx user_session_id = get_script_run_ctx().session_id - st.session_state["user_session_id"] = user_session_id + st.session_state[StateKeys.USER_SESSION_ID] = user_session_id def check_software_file(df, software): @@ -348,8 +361,8 @@ def select_sample_column_metadata(df, software): submitted = st.form_submit_button("Create DataSet") if submitted: - if len(df[st.session_state.sample_column].to_list()) != len( - df[st.session_state.sample_column].unique() + if len(df[st.session_state[StateKeys.SAMPLE_COLUMN]].to_list()) != len( + df[st.session_state[StateKeys.SAMPLE_COLUMN]].unique() ): st.error("Sample names have to be unique.") st.stop() @@ -357,8 +370,8 @@ def select_sample_column_metadata(df, software): def create_metadata_file(): - dataset = DataSet(loader=st.session_state.loader) - st.session_state["metadata_columns"] = ["sample"] + dataset = DataSet(loader=st.session_state[StateKeys.LOADER]) + st.session_state[StateKeys.METADATA_COLUMNS] = ["sample"] metadata = dataset.metadata buffer = io.BytesIO() diff --git a/alphastats/gui/utils/ui_helper.py b/alphastats/gui/utils/ui_helper.py index d7a8779e..a2716043 100644 --- a/alphastats/gui/utils/ui_helper.py +++ b/alphastats/gui/utils/ui_helper.py @@ -58,3 +58,29 @@ def img_to_bytes(img_path): # img_bytes = Path(img_path).read_bytes() # encoded = base64.b64encode(img_bytes).decode() return encoded_string.decode() + + +class StateKeys: + ## 02_Data Import + # on 1st run + ORGANISM = "organism" + GENE_TO_PROT_ID = "gene_to_prot_id" + USER_SESSION_ID = "user_session_id" + LOADER = "loader" + SOFTWARE = "software" + # on sample run (function load_sample_data), removed on new session click + DATASET = "dataset" # functions upload_metadatafile + PLOTTING_OPTIONS = "plotting_options" # function load_options + STATISTIC_OPTIONS = "statistic_options" # function load_options + DISTRIBUTION_PLOT = ( + "distribution_plot" # function save_plot_sampledistribution_rawdata + ) + METADATA_COLUMNS = ( + "metadata_columns" # function create_metadata_file, upload_metadatafile + ) + # on data upload + INTENSITY_COLUMN = "intensity_column" + INDEX_COLUMN = "index_column" + # on metadata upload + METADATAFILE = "metadatafile" + SAMPLE_COLUMN = "sample_column"