Skip to content
This repository has been archived by the owner on May 1, 2023. It is now read-only.

Commit

Permalink
Merge pull request #261 from Aarhus-Psychiatry-Research/HLasse/Add-wa…
Browse files Browse the repository at this point in the history
…ndb-eval-watcher

feat: add wandb eval watcher
  • Loading branch information
MartinBernstorff authored Oct 20, 2022
2 parents 90cb270 + 19373de commit 7cffd2c
Show file tree
Hide file tree
Showing 12 changed files with 474 additions and 252 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ python src/psycopt2d/train_model.py --config-name test_config.yaml +model=xgboos

To test new integrations with WandB:
```python
python src/psycopt2d/train_model.py +model=xgboost project.wandb_model="run" --config-name integration_testing.yaml
python src/psycopt2d/train_model.py +model=xgboost project.wandb_mode="run" --config-name integration_testing.yaml
```


Expand Down
2 changes: 1 addition & 1 deletion src/psycopt2d/config/sweeper/optuna_singlethread.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,5 @@ hydra:
sweeper:
sampler:
seed: 123
n_trials: 1000
n_trials: 3
direction: maximize
19 changes: 19 additions & 0 deletions src/psycopt2d/dataclasses/configs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
"""Dataclasses used in the project."""
from typing import Optional

import pandas as pd
from omegaconf import DictConfig
from pydantic import BaseModel

# pylint: disable=missing-class-docstring, too-few-public-methods


class ModelEvalData(BaseModel):
"""Dataclass for model evaluation data."""

class Config:
arbitrary_types_allowed = True

df: pd.DataFrame
cfg: DictConfig
feature_importance_dict: Optional[dict[str, float]] = None
68 changes: 16 additions & 52 deletions src/psycopt2d/evaluate_saved_model_predictions.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,73 +2,37 @@
pickle file with model predictions and hydra config.
Possible extensions (JIT when needed):
- Load most recent pickle file from 'evaluation_results' folder
- Load most recent directory from 'evaluation_results'/Overtaci equivalent folder
- Evaluate all models in 'evaluation_results' folder
- CLI for evaluating a model
"""
from pathlib import Path

import pandas as pd
from omegaconf.dictconfig import DictConfig

from psycopt2d.utils import PROJECT_ROOT, read_pickle
from psycopt2d.utils import (
PROJECT_ROOT,
infer_outcome_col_name,
infer_y_hat_prob_col_name,
load_evaluation_data,
)
from psycopt2d.visualization import plot_auc_by_time_from_first_visit


def infer_outcome_col_name(df: pd.DataFrame, prefix: str = "outc_") -> str:
"""Infer the outcome column name from the dataframe."""
outcome_name = [c for c in df.columns if c.startswith(prefix)]
if len(outcome_name) == 1:
return outcome_name[0]
else:
raise ValueError("More than one outcome inferred")


def infer_predictor_col_names(df: pd.DataFrame, cfg: DictConfig) -> list[str]:
"""Get the predictors that are used in the model.
Args:
df (pd.Dataframe): Dataframe with model predictions
cfg (DictConfig): Config file
Returns:
list[str]: list of predictors
"""
return [c for c in df.columns if c.startswith(cfg.data.pred_col_name_prefix)]


def load_model_predictions_and_cfg(path: Path) -> tuple[pd.DataFrame, DictConfig]:
"""Load model predictions and config file from a pickle file.
Args:
path (Path): Path to pickle file
"""
obj = read_pickle(path)
return obj["df"], obj["cfg"]


if __name__ == "__main__":

eval_path = PROJECT_ROOT / "evaluation_results"
eval_df, cfg = load_model_predictions_and_cfg(
eval_path
# insert your own model path here
/ "eval_model_name-xgboost_require_imputation-True_args-n_estimators-100_tree_method-auto_2022_09_22_10_52.pkl",
)
# change to whatever modele you wish to evaluate
eval_data = load_evaluation_data(eval_path / "2022_10_18_13_23_2h3cxref")

train_col_names = infer_predictor_col_names(eval_df, cfg)
y_col_name = infer_outcome_col_name(eval_df)
y_col_name = infer_outcome_col_name(eval_data.df)

Y_HAT_PROB_COL_NAME = "y_hat_prob" # change to 'y_hat_prob_oof' if using cv
y_hat_prob_name = infer_y_hat_prob_col_name(eval_data.df)

first_visit_timestamp = eval_df.groupby(cfg.data.id_col_name)[
cfg.data.pred_timestamp_col_name
first_visit_timestamp = eval_data.df.groupby(eval_data.cfg.data.id_col_name)[
eval_data.cfg.data.pred_timestamp_col_name
].transform("min")

# Do whatever extra evaluation you want to here
p = plot_auc_by_time_from_first_visit(
labels=eval_df[y_col_name],
y_hat_probs=eval_df[Y_HAT_PROB_COL_NAME],
labels=eval_data.df[y_col_name],
y_hat_probs=eval_data.df[y_hat_prob_name],
first_visit_timestamps=first_visit_timestamp,
prediction_timestamps=eval_df[cfg.data.pred_timestamp_col_name],
prediction_timestamps=eval_data.df[eval_data.cfg.data.pred_timestamp_col_name],
)
69 changes: 13 additions & 56 deletions src/psycopt2d/evaluation.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,20 @@
"""Functions for evaluating a model's predictions."""
from collections.abc import Iterable
from pathlib import Path
from typing import Optional, Union
from typing import Optional

import numpy as np
import pandas as pd
from omegaconf.dictconfig import DictConfig
from sklearn.metrics import recall_score, roc_auc_score
from sklearn.pipeline import Pipeline
from wandb.sdk.wandb_run import Run as wandb_run # pylint: disable=no-name-in-module
from wasabi import Printer

from psycopt2d.tables import generate_feature_importances_table
from psycopt2d.tables.performance_by_threshold import (
generate_performance_by_positive_rate_table,
)
from psycopt2d.utils import (
AUC_LOGGING_FILE_PATH,
PROJECT_ROOT,
positive_rate_to_pred_probs,
prediction_df_with_metadata_to_disk,
)
from psycopt2d.utils import PROJECT_ROOT, positive_rate_to_pred_probs
from psycopt2d.visualization import (
plot_auc_by_time_from_first_visit,
plot_feature_importances,
Expand All @@ -35,58 +29,37 @@

def log_feature_importances(
cfg: DictConfig,
pipe: Pipeline,
feature_names: Iterable[str],
feature_importance_dict: dict[str, float],
run: wandb_run,
save_path: Optional[Path] = None,
) -> dict[str, Path]:
"""Log feature importances to wandb."""
# Handle EBM differently as it autogenerates interaction terms
if cfg.model.model_name == "ebm":
feature_names = pipe["model"].feature_names

feature_importance_plot_path = plot_feature_importances(
column_names=feature_names,
feature_importances=pipe["model"].feature_importances_,
feature_names=feature_importance_dict.keys(),
feature_importances=feature_importance_dict.values(),
top_n_feature_importances=cfg.evaluation.top_n_feature_importances,
save_path=save_path,
)

# Log as table too for readability
feature_importances_table = generate_feature_importances_table(
feature_names=feature_names,
feature_importances=pipe["model"].feature_importances_,
feature_names=feature_importance_dict.keys(),
feature_importances=feature_importance_dict.values(),
)

run.log({"feature_importance_table": feature_importances_table})

return {"feature_importance": feature_importance_plot_path}


def log_auc_to_file(cfg: DictConfig, run: wandb_run, auc: Union[float, int]):
"""Log AUC to file."""
# log AUC and run ID to a file to find the best run later
# Only create the file if it doesn't exists (will be auto-deleted/moved after
# syncing). This is to avoid creating a new file every time the script is run
# e.g. during a hyperparameter seacrch.
if cfg.project.wandb_mode in {"offline", "dryrun"}:
if not AUC_LOGGING_FILE_PATH.exists():
AUC_LOGGING_FILE_PATH.parent.mkdir(parents=True, exist_ok=True)
AUC_LOGGING_FILE_PATH.touch()
AUC_LOGGING_FILE_PATH.write_text("run_id,auc\n")
with open(AUC_LOGGING_FILE_PATH, "a", encoding="utf-8") as f:
f.write(f"{run.id},{auc}\n")


def evaluate_model(
cfg,
pipe: Pipeline,
eval_df: pd.DataFrame,
y_col_name: str,
train_col_names: Iterable[str],
y_hat_prob_col_name: str,
run: wandb_run,
):
feature_importance_dict: Optional[dict[str, float]],
) -> None:
"""Runs the evaluation suite on the model and logs to WandB.
At present, this includes:
1. AUC
Expand All @@ -99,12 +72,12 @@ def evaluate_model(
Args:
cfg (OmegaConf): The hydra config from the run
pipe (Pipeline): Pipeline including the model
eval_df (pd.DataFrame): Evalaution split
y_col_name (str): Label column name
train_col_names (Iterable[str]): Column names for all predictors
y_hat_prob_col_name (str): Column name containing pred_proba output
run (wandb_run): WandB run to log to.
feature_importance_dict (Optional[dict[str, float]]): Dict of feature
names and their importance. If None, will not log feature importance.
"""
msg = Printer(timestamp=True)

Expand Down Expand Up @@ -159,11 +132,8 @@ def evaluate_model(
)

msg.info(f"AUC: {auc}")
run.log({"roc_auc_unweighted": auc})
run.log({"1_minus_roc_auc_unweighted": 1 - auc})

log_auc_to_file(cfg, run=run, auc=auc)

# Tables
# Performance by threshold
performance_by_threshold_df = generate_performance_by_positive_rate_table(
Expand All @@ -183,26 +153,16 @@ def evaluate_model(
plots = {}

# Feature importance
if hasattr(pipe["model"], "feature_importances_"):
feature_names = train_col_names

if feature_importance_dict is not None:
feature_importances_plot_dict = log_feature_importances(
cfg=cfg,
pipe=pipe,
feature_names=train_col_names,
feature_importance_dict=feature_importance_dict,
run=run,
save_path=SAVE_DIR / "feature_importances.png",
)

plots.update(feature_importances_plot_dict)

# Log as table too for readability
feature_importances_table = generate_feature_importances_table(
feature_names=feature_names,
feature_importances=pipe["model"].feature_importances_,
)
run.log({"feature_importance_table": feature_importances_table})

# Add plots
plots.update(
{
Expand Down Expand Up @@ -249,7 +209,4 @@ def evaluate_model(
for chart_name, chart_path in plots.items():
log_image_to_wandb(chart_path=chart_path, chart_name=chart_name, run=run)

# Save results to disk
prediction_df_with_metadata_to_disk(df=eval_df, cfg=cfg, run=run)

msg.info("Finished model evaluation, logging charts to WandB")
Loading

0 comments on commit 7cffd2c

Please sign in to comment.