OpenMS · t0mdavid-m · Aug 30, 2024 · Aug 20, 2024 · Aug 20, 2024 · Aug 20, 2024
diff --git a/src/common.py b/src/common.py
@@ -251,6 +251,68 @@ def v_space(n: int, col=None) -> None:
         else:
             st.write("#")
 
+def display_large_dataframe(df, 
+                            chunk_sizes: list[int] = [100, 1_000, 10_000]):
+    """
+    Displays a large DataFrame in chunks with pagination controls and row selection.
+
+    Args:
+        df: The DataFrame to display.
+        chunk_sizes: A list of chunk sizes to choose from.
+
+    Returns:
+        Selected rows from the current chunk.
+    """
+    # Dropdown for selecting chunk size
+    chunk_size = st.selectbox("Select Number of Rows to Display", chunk_sizes)
+
+    # Calculate total number of chunks
+    total_chunks = (len(df) + chunk_size - 1) // chunk_size
+
+    # Initialize session state for pagination
+    if 'current_chunk' not in st.session_state:
+        st.session_state.current_chunk = 0
+
+    # Function to get the current chunk of the DataFrame
+    def get_current_chunk(df, chunk_size, chunk_index):
+        start = chunk_index * chunk_size
+        end = min(start + chunk_size, len(df))  # Ensure end does not exceed dataframe length
+        return df.iloc[start:end], start, end
+
+    # Display the current chunk
+    current_chunk_df, start_row, end_row = get_current_chunk(df, chunk_size, st.session_state.current_chunk)
+
+    event = st.dataframe(
+        current_chunk_df,
+        column_order=[
+            "spectrum ID",
+            "RT",
+            "MS level",
+            "max intensity m/z",
+            "precursor m/z",
+        ],
+        selection_mode="single-row",
+        on_select="rerun",
+        use_container_width=True,
+        hide_index=True,
+    )
+
+    st.write(f"Showing rows {start_row + 1} to {end_row} of {len(df)} ({get_dataframe_mem_useage(current_chunk_df):.2f} MB)")
+
+    # Pagination buttons
+    col1, col2, col3 = st.columns([1, 2, 1])
+
+    with col1:
+        if st.button("Previous") and st.session_state.current_chunk > 0:
+            st.session_state.current_chunk -= 1
+
+    with col3:
+        if st.button("Next") and st.session_state.current_chunk < total_chunks - 1:
+            st.session_state.current_chunk += 1   
+
+    if event is not None:
+        return event
+    return None
 
 def show_table(df: pd.DataFrame, download_name: str = "") -> None:
     """
@@ -353,6 +415,22 @@ def reset_directory(path: Path) -> None:
         shutil.rmtree(path)
     path.mkdir(parents=True, exist_ok=True)
 
+def get_dataframe_mem_useage(df):
+    """
+    Get the memory usage of a pandas DataFrame in megabytes.
+
+    Args:
+        df (pd.DataFrame): The DataFrame to calculate the memory usage for.
+
+    Returns:
+        float: The memory usage of the DataFrame in megabytes.
+    """
+    # Calculate the memory usage of the DataFrame in bytes
+    memory_usage_bytes = df.memory_usage(deep=True).sum()
+    # Convert bytes to megabytes
+    memory_usage_mb = memory_usage_bytes / (1024 ** 2)
+    return memory_usage_mb
+
 
 # General warning/error messages
 WARNINGS = {

diff --git a/src/view.py b/src/view.py
@@ -6,7 +6,7 @@
 import streamlit as st
 import pyopenms as poms
 from .plotting.MSExperimentPlotter import plotMSExperiment
-from .common import show_fig
+from .common import show_fig, display_large_dataframe
 
 from typing import Union
 
@@ -216,27 +216,13 @@ def view_peak_map():
             peak_map_3D = plotMSExperiment(df, plot3D=True, title="")
             st.pyplot(peak_map_3D, use_container_width=True)
 
-
 @st.experimental_fragment
 def view_spectrum():
     cols = st.columns([0.34, 0.66])
     with cols[0]:
         df = st.session_state.view_spectra.copy()
         df["spectrum ID"] = df.index + 1
-        event = st.dataframe(
-            df,
-            column_order=[
-                "spectrum ID",
-                "RT",
-                "MS level",
-                "max intensity m/z",
-                "precursor m/z",
-            ],
-            selection_mode="single-row",
-            on_select="rerun",
-            use_container_width=True,
-            hide_index=True,
-        )
+        event = display_large_dataframe(df)
         rows = event.selection.rows
     with cols[1]:
         if rows: