Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bookkeeping of session state variables #291

Closed
wants to merge 9 commits into from
Closed
27 changes: 22 additions & 5 deletions alphastats/gui/pages/02_Import Data.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,13 @@
empty_session_state,
)
from alphastats.gui.utils.ui_helper import sidebar_info
from alphastats.DataSet import DataSet
from alphastats.gui.utils.analysis_helper import (
get_sample_names_from_software_file,
read_uploaded_file_into_df,
)
from alphastats.gui.utils.ui_helper import sidebar_info, StateKeys
from alphastats.loader.MaxQuantLoader import MaxQuantLoader

except ModuleNotFoundError:
from utils.ui_helper import sidebar_info
Expand All @@ -22,24 +29,34 @@
empty_session_state,
)

from utils.ui_helper import sidebar_info, StateKeys
from utils.analysis_helper import (
get_sample_names_from_software_file,
read_uploaded_file_into_df,
)
from alphastats import MaxQuantLoader
from alphastats import DataSet

import pandas as pd
import plotly.express as px
from streamlit.runtime import get_instance
from streamlit.runtime.scriptrunner.script_run_context import get_script_run_ctx

runtime = get_instance()
session_id = get_script_run_ctx().session_id
session_info = runtime._session_mgr.get_session_info(session_id)
# session_info = runtime._session_mgr.get_session_info(session_id)

user_session_id = session_id
st.session_state["user_session_id"] = user_session_id
st.session_state[StateKeys.USER_SESSION_ID] = user_session_id

if "loader" not in st.session_state:
st.session_state["loader"] = None
st.session_state[StateKeys.LOADER] = None

if "gene_to_prot_id" not in st.session_state:
st.session_state["gene_to_prot_id"] = {}
st.session_state[StateKeys.GENE_TO_PROT_ID] = {}

if "organism" not in st.session_state:
st.session_state["organism"] = 9606 # human
st.session_state[StateKeys.ORGANISM] = 9606 # human

sidebar_info()

Expand Down
12 changes: 6 additions & 6 deletions alphastats/gui/pages/03_Data Overview.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,42 +8,42 @@
get_intensity_distribution_processed,
get_sample_histogram_matrix,
)
from alphastats.gui.utils.ui_helper import sidebar_info
from alphastats.gui.utils.ui_helper import sidebar_info, StateKeys

except ModuleNotFoundError:
from utils.overview_helper import (
display_matrix,
get_intensity_distribution_processed,
get_sample_histogram_matrix,
)
from utils.ui_helper import sidebar_info
from utils.ui_helper import sidebar_info, StateKeys

sidebar_info()

if "dataset" in st.session_state:
if StateKeys.DATASET in st.session_state:
st.markdown("## DataSet overview")

c1, c2 = st.columns(2)

with c1:
st.markdown("**Intensity distribution raw data per sample**")
st.plotly_chart(
st.session_state.distribution_plot.update_layout(plot_bgcolor="white"),
st.session_state[StateKeys.DISTRIBUTION_PLOT].update_layout(plot_bgcolor="white"),
use_container_width=True,
)

with c2:
st.markdown("**Intensity distribution data per sample used for analysis**")
st.plotly_chart(
get_intensity_distribution_processed(
user_session_id=st.session_state.user_session_id
user_session_id=st.session_state[StateKeys.USER_SESSION_ID]
).update_layout(plot_bgcolor="white"),
use_container_width=True,
)

st.plotly_chart(
get_sample_histogram_matrix(
user_session_id=st.session_state.user_session_id
user_session_id=st.session_state[StateKeys.USER_SESSION_ID]
).update_layout(plot_bgcolor="white"),
use_container_width=True,
)
Expand Down
93 changes: 53 additions & 40 deletions alphastats/gui/utils/import_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

try:
from alphastats.DataSet import DataSet
from alphastats.gui.utils.ui_helper import StateKeys
from alphastats.gui.utils.analysis_helper import (
get_sample_names_from_software_file,
read_uploaded_file_into_df,
Expand All @@ -15,6 +16,7 @@
from alphastats.loader.MaxQuantLoader import MaxQuantLoader

except ModuleNotFoundError:
from utils.ui_helper import StateKeys
from utils.analysis_helper import (
get_sample_names_from_software_file,
read_uploaded_file_into_df,
Expand All @@ -27,8 +29,8 @@
def load_options():
from alphastats.gui.utils.options import plotting_options, statistic_options

st.session_state["plotting_options"] = plotting_options(st.session_state)
st.session_state["statistic_options"] = statistic_options(st.session_state)
st.session_state[StateKeys.PLOTTING_OPTIONS] = plotting_options(st.session_state)
st.session_state[StateKeys.STATISTIC_OPTIONS] = statistic_options(st.session_state)


def load_proteomics_data(uploaded_file, intensity_column, index_column, software):
Expand Down Expand Up @@ -60,16 +62,16 @@ def upload_softwarefile(software):
select_columns_for_loaders(software=software, software_df=softwarefile_df)

if (
"intensity_column" in st.session_state
and "index_column" in st.session_state
StateKeys.INTENSITY_COLUMN in st.session_state
and StateKeys.INDEX_COLUMN in st.session_state
):
loader = load_proteomics_data(
softwarefile_df,
intensity_column=st.session_state.intensity_column,
index_column=st.session_state.index_column,
intensity_column=st.session_state[StateKeys.INTENSITY_COLUMN],
index_column=st.session_state[StateKeys.INDEX_COLUMN],
software=software,
)
st.session_state["loader"] = loader
st.session_state[StateKeys.LOADER] = loader


def upload_metadatafile(software):
Expand All @@ -80,8 +82,13 @@ def upload_metadatafile(software):
key="metadatafile",
)

if metadatafile_upload is not None and st.session_state.loader is not None:
metadatafile_df = read_uploaded_file_into_df(st.session_state.metadatafile)
if (
metadatafile_upload is not None
and st.session_state[StateKeys.LOADER] is not None
):
metadatafile_df = read_uploaded_file_into_df(
st.session_state[StateKeys.METADATAFILE]
)
# display metadata
st.write(
f"File successfully uploaded. Number of rows: {metadatafile_df.shape[0]}"
Expand All @@ -92,26 +99,30 @@ def upload_metadatafile(software):

if select_sample_column_metadata(metadatafile_df, software):
# create dataset
st.session_state["dataset"] = DataSet(
loader=st.session_state.loader,
st.session_state[StateKeys.DATASET] = DataSet(
loader=st.session_state[StateKeys.LOADER],
metadata_path=metadatafile_df,
sample_column=st.session_state.sample_column,
sample_column=st.session_state[StateKeys.SAMPLE_COLUMN],
)
st.session_state[StateKeys.METADATA_COLUMNS] = (
metadatafile_df.columns.to_list()
)
st.session_state["metadata_columns"] = metadatafile_df.columns.to_list()
load_options()

if st.session_state.loader is not None:
if st.session_state[StateKeys.LOADER] is not None:
create_metadata_file()
st.write(
"Download the template file and add additional information as "
+ "columns to your samples such as disease group. "
+ "Upload the updated metadata file."
)

if st.session_state.loader is not None:
if st.session_state[StateKeys.LOADER] is not None:
if st.button("Create a DataSet without metadata"):
st.session_state["dataset"] = DataSet(loader=st.session_state.loader)
st.session_state["metadata_columns"] = ["sample"]
st.session_state[StateKeys.DATASET] = DataSet(
loader=st.session_state[StateKeys.LOADER]
)
st.session_state[StateKeys.METADATA_COLUMNS] = ["sample"]

load_options()

Expand Down Expand Up @@ -170,9 +181,9 @@ def load_sample_data():
]
]
ds.preprocess(subset=True)
st.session_state["loader"] = loader
st.session_state["metadata_columns"] = ds.metadata.columns.to_list()
st.session_state["dataset"] = ds
st.session_state[StateKeys.LOADER] = loader
st.session_state[StateKeys.METADATA_COLUMNS] = ds.metadata.columns.to_list()
st.session_state[StateKeys.DATASET] = ds

load_options()

Expand All @@ -186,43 +197,45 @@ def import_data():
key="software",
)

if st.session_state.software != "<select>":
upload_softwarefile(software=st.session_state.software)
if st.session_state[StateKeys.SOFTWARE] != "<select>":
upload_softwarefile(software=st.session_state[StateKeys.SOFTWARE])
if "loader" not in st.session_state:
st.session_state["loader"] = None
if st.session_state.loader is not None:
upload_metadatafile(st.session_state.software)
st.session_state[StateKeys.LOADER] = None
if st.session_state[StateKeys.LOADER] is not None:
upload_metadatafile(st.session_state[StateKeys.SOFTWARE])


def display_loaded_dataset():
st.info("Data was successfully imported")
st.info("DataSet has been created")

st.markdown(f"*Preview:* Raw data from {st.session_state.dataset.software}")
st.dataframe(st.session_state.dataset.rawinput.head(5))
st.markdown(
f"*Preview:* Raw data from {st.session_state[StateKeys.DATASET].software}"
)
st.dataframe(st.session_state[StateKeys.DATASET].rawinput.head(5))

st.markdown("*Preview:* Metadata")
st.dataframe(st.session_state.dataset.metadata.head(5))
st.dataframe(st.session_state[StateKeys.DATASET].metadata.head(5))

st.markdown("*Preview:* Matrix")

df = pd.DataFrame(
st.session_state.dataset.mat.values,
index=st.session_state.dataset.mat.index.to_list(),
st.session_state[StateKeys.DATASET].mat.values,
index=st.session_state[StateKeys.DATASET].mat.index.to_list(),
).head(5)

st.dataframe(df)


def save_plot_sampledistribution_rawdata():
df = st.session_state.dataset.rawmat
df = st.session_state[StateKeys.DATASET].rawmat
df = df.unstack().reset_index()
df.rename(
columns={"level_1": st.session_state.dataset.sample, 0: "Intensity"},
columns={"level_1": st.session_state[StateKeys.DATASET].sample, 0: "Intensity"},
inplace=True,
)
st.session_state["distribution_plot"] = px.violin(
df, x=st.session_state.dataset.sample, y="Intensity"
st.session_state[StateKeys.DISTRIBUTION_PLOT] = px.violin(
df, x=st.session_state[StateKeys.DATASET].sample, y="Intensity"
)


Expand All @@ -233,12 +246,12 @@ def empty_session_state():
for key in st.session_state.keys():
del st.session_state[key]
st.empty()
st.session_state["software"] = "<select>"
st.session_state[StateKeys.SOFTWARE] = "<select>"

from streamlit.runtime.scriptrunner.script_run_context import get_script_run_ctx

user_session_id = get_script_run_ctx().session_id
st.session_state["user_session_id"] = user_session_id
st.session_state[StateKeys.USER_SESSION_ID] = user_session_id


def check_software_file(df, software):
Expand Down Expand Up @@ -348,17 +361,17 @@ def select_sample_column_metadata(df, software):
submitted = st.form_submit_button("Create DataSet")

if submitted:
if len(df[st.session_state.sample_column].to_list()) != len(
df[st.session_state.sample_column].unique()
if len(df[st.session_state[StateKeys.SAMPLE_COLUMN]].to_list()) != len(
df[st.session_state[StateKeys.SAMPLE_COLUMN]].unique()
):
st.error("Sample names have to be unique.")
st.stop()
return True


def create_metadata_file():
dataset = DataSet(loader=st.session_state.loader)
st.session_state["metadata_columns"] = ["sample"]
dataset = DataSet(loader=st.session_state[StateKeys.LOADER])
st.session_state[StateKeys.METADATA_COLUMNS] = ["sample"]
metadata = dataset.metadata
buffer = io.BytesIO()

Expand Down
26 changes: 26 additions & 0 deletions alphastats/gui/utils/ui_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,3 +58,29 @@ def img_to_bytes(img_path):
# img_bytes = Path(img_path).read_bytes()
# encoded = base64.b64encode(img_bytes).decode()
return encoded_string.decode()


class StateKeys:
## 02_Data Import
# on 1st run
ORGANISM = "organism"
GENE_TO_PROT_ID = "gene_to_prot_id"
USER_SESSION_ID = "user_session_id"
LOADER = "loader"
SOFTWARE = "software"
# on sample run (function load_sample_data), removed on new session click
DATASET = "dataset" # functions upload_metadatafile
PLOTTING_OPTIONS = "plotting_options" # function load_options
STATISTIC_OPTIONS = "statistic_options" # function load_options
DISTRIBUTION_PLOT = (
"distribution_plot" # function save_plot_sampledistribution_rawdata
)
METADATA_COLUMNS = (
"metadata_columns" # function create_metadata_file, upload_metadatafile
)
# on data upload
INTENSITY_COLUMN = "intensity_column"
INDEX_COLUMN = "index_column"
# on metadata upload
METADATAFILE = "metadatafile"
SAMPLE_COLUMN = "sample_column"
Loading