Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bug fix: Fix model_selection_check() #4

Merged
merged 2 commits into from
Jan 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 6 additions & 5 deletions peak_performance/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -1409,7 +1409,7 @@ def selected_models_to_template(


def model_selection_check(
result_df: pandas.DataFrame, ic: str, elpd_threshold: Union[str, float] = 25
result_df: pandas.DataFrame, ic: str, elpd_threshold: Union[str, float] = 35
) -> str:
"""
During model seleciton, double peak models are sometimes incorrectly preferred due to their increased complexity.
Expand All @@ -1435,10 +1435,11 @@ def model_selection_check(
selected_model = str(result_df.index[0])
if "double" in selected_model:
df_single_peak_models = result_df[~result_df.index.str.contains("double")]
elpd_single = max(list(df_single_peak_models[f"elpd_{ic}"]))
elpd_double = max(list(result_df[f"elpd_{ic}"]))
if not elpd_double > elpd_single + elpd_threshold:
selected_model = str(df_single_peak_models.index[0])
if len(df_single_peak_models) > 0:
elpd_single = max(list(df_single_peak_models[f"elpd_{ic}"]))
elpd_double = max(list(result_df[f"elpd_{ic}"]))
if not elpd_double > elpd_single + elpd_threshold:
selected_model = str(df_single_peak_models.index[0])
return selected_model


Expand Down
9 changes: 8 additions & 1 deletion peak_performance/test_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -636,11 +636,18 @@ def test_model_selection_check():
assert selected_model == "normal"
# case 2: double peak exceeds elpd score difference threshold and is thusly accepted
result_df = pandas.DataFrame(
{"elpd_loo": [50, 30, 10, -5], "ic": ["loo", "loo", "loo", "loo"]},
{"elpd_loo": [50, 30, 20, -5], "ic": ["loo", "loo", "loo", "loo"]},
index=["double_normal", "double_skew_normal", "normal", "skew_normal"],
)
selected_model = pl.model_selection_check(result_df, "loo", 25)
assert selected_model == "double_normal"
# case 3: single peak models were excluded
result_df = pandas.DataFrame(
{"elpd_loo": [50, 30], "ic": ["loo", "loo"]},
index=["double_normal", "double_skew_normal"],
)
selected_model = pl.model_selection_check(result_df, "loo", 25)
assert selected_model == "double_normal"
pass


Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

[project]
name = "peak_performance"
version = "0.6.4"
version = "0.6.5"
authors = [
{name = "Jochen Nießer", email = "j.niesser@fz-juelich.de"},
{name = "Michael Osthege", email = "m.osthege@fz-juelich.de"},
Expand Down
Loading