Skip to content

Commit

Permalink
Merge pull request #178 from Proteobench/issue40_CV
Browse files Browse the repository at this point in the history
added median CV, Q75 and Q90 to datapoint.result
  • Loading branch information
RobbinBouwmeester authored Dec 18, 2023
2 parents ac2b97c + f55f0a3 commit fd36c13
Show file tree
Hide file tree
Showing 6 changed files with 33 additions and 16 deletions.
6 changes: 2 additions & 4 deletions proteobench/modules/dda_quant/datapoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@
import pandas as pd


def filter_df_numquant_weighted_sum(row, min_quant=3):
def filter_df_numquant_median_abs_epsilon(row, min_quant=3):
if isinstance(row, dict) and min_quant in row and isinstance(row[min_quant], dict):
return row[min_quant].get("weighted_sum")
return row[min_quant].get("median_abs_epsilon")
return None


Expand Down Expand Up @@ -41,8 +41,6 @@ class Datapoint:
allowed_miscleavages: int = 0
min_peptide_length: int = 0
max_peptide_length: int = 0
weighted_sum: int = 0
nr_prec: int = 0
is_temporary: bool = True
intermediate_hash: str = ""
results: dict = None
Expand Down
24 changes: 21 additions & 3 deletions proteobench/modules/dda_quant/module.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,10 +170,28 @@ def get_metrics(df, min_nr_observed=1):
# take abs value of df["epsilon"]
# TODO use nr_missing to filter df before computing stats.
df_slice = df[df["nr_observed"] >= min_nr_observed]
weighted_sum = round(df_slice["epsilon"].abs().mean(), ndigits=3)
nr_prec = len(df_slice)

return {min_nr_observed: {"weighted_sum": weighted_sum, "nr_prec": nr_prec}}
# median abs unafected by outliers
median_abs_epsilon = df_slice["epsilon"].abs().mean()
# variance affected by outliers
variance_epsilon = df_slice["epsilon"].var()
# TODO more concise way to describe distribution of CV's
cv_median = (df_slice["CV_A"].median() + df_slice["CV_B"].median()) / 2
cv_q75 = (df_slice["CV_A"].quantile(0.75) + df_slice["CV_B"].quantile(0.75)) / 2
cv_q90 = (df_slice["CV_A"].quantile(0.9) + df_slice["CV_B"].quantile(0.9)) / 2
cv_q95 = (df_slice["CV_A"].quantile(0.95) + df_slice["CV_B"].quantile(0.95)) / 2

return {
min_nr_observed: {
"median_abs_epsilon": median_abs_epsilon,
"variance_epsilon": variance_epsilon,
"nr_prec": nr_prec,
"CV_median": cv_median,
"CV_q90": cv_q90,
"CV_q75": cv_q75,
"CV_q95": cv_q95,
}
}

def generate_datapoint(
self,
Expand Down
2 changes: 1 addition & 1 deletion proteobench/modules/dda_quant/plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ def plot_metric(self, benchmark_metrics_df: pd.DataFrame) -> go.Figure:
fig = go.Figure(
data=[
go.Scatter(
x=benchmark_metrics_df["weighted_sum"],
x=benchmark_metrics_df["median_abs_epsilon"],
y=benchmark_metrics_df["nr_prec"],
mode="markers",
text=hover_texts,
Expand Down
2 changes: 0 additions & 2 deletions proteobench/modules/template/datapoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,6 @@ class Datapoint:
missed_cleavages: int = 0
min_pep_length: int = 0
max_pep_length: int = 0
weighted_sum: int = 0
nr_prec: int = 0

def calculate_benchmarking_metric_1(self, intermediate_data):
"""Calculates the first benchmarking metric based on the intermediate data.
Expand Down
2 changes: 1 addition & 1 deletion test/test_module_dda_quant.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ def test_benchmarking(self):
TESTDATA_FILES["MaxQuant"], "MaxQuant", user_input, None
)
self.assertTrue(isinstance(all_datapoints, pd.DataFrame))
self.assertEqual(len(all_datapoints.results[1]), 6)
self.assertEqual(len(all_datapoints.results[len(all_datapoints.results) - 1]), 6)


class TestWrongFormatting(unittest.TestCase):
Expand Down
13 changes: 8 additions & 5 deletions webinterface/pages/DDA_Quant.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@
from streamlit_extras.let_it_rain import rain

from proteobench.modules.dda_quant.datapoint import (
filter_df_numquant_median_abs_epsilon,
filter_df_numquant_nr_prec,
filter_df_numquant_weighted_sum,
)
from proteobench.modules.dda_quant.module import Module
from proteobench.modules.dda_quant.parse_settings import (
Expand Down Expand Up @@ -198,7 +198,9 @@ def _main_page(self):
all_datapoints = st.session_state[ALL_DATAPOINTS]
all_datapoints = Module().obtain_all_data_point(all_datapoints)

all_datapoints["weighted_sum"] = all_datapoints.apply(filter_df_numquant_weighted_sum, min_quant=3)
all_datapoints["median_abs_epsilon"] = all_datapoints.apply(
filter_df_numquant_median_abs_epsilon, min_quant=3
)
all_datapoints["nr_prec"] = all_datapoints.apply(filter_df_numquant_nr_prec, min_quant=3)

fig2 = PlotDataPoint().plot_metric(all_datapoints)
Expand Down Expand Up @@ -246,8 +248,9 @@ def _run_proteobench(self):

def slider_callback(self):
min_quant = st.session_state[st.session_state["slider_id"]]
st.session_state[ALL_DATAPOINTS]["weighted_sum"] = [
filter_df_numquant_weighted_sum(v, min_quant=min_quant) for v in st.session_state[ALL_DATAPOINTS]["results"]
st.session_state[ALL_DATAPOINTS]["median_abs_epsilon"] = [
filter_df_numquant_median_abs_epsilon(v, min_quant=min_quant)
for v in st.session_state[ALL_DATAPOINTS]["results"]
]
st.session_state[ALL_DATAPOINTS]["nr_prec"] = [
filter_df_numquant_nr_prec(v, min_quant=min_quant) for v in st.session_state[ALL_DATAPOINTS]["results"]
Expand Down Expand Up @@ -331,7 +334,7 @@ def generate_results(

if recalculate:
all_datapoints["weighted_sum"] = [
filter_df_numquant_weighted_sum(v, min_quant=3) for v in all_datapoints["results"]
filter_df_numquant_median_abs_epsilon(v, min_quant=3) for v in all_datapoints["results"]
]
all_datapoints["nr_prec"] = [filter_df_numquant_nr_prec(v, min_quant=3) for v in all_datapoints["results"]]

Expand Down

0 comments on commit fd36c13

Please sign in to comment.