Merge pull request #261 from Aarhus-Psychiatry-Research/HLasse/Add-wa…

…ndb-eval-watcher feat: add wandb eval watcher
Aarhus-Psychiatry-Research · Oct 20, 2022 · 7cffd2c · 7cffd2c
2 parents 90cb270 + 19373de
commit 7cffd2c
Show file tree

Hide file tree

Showing 12 changed files with 474 additions and 252 deletions.
diff --git a/README.md b/README.md
@@ -21,7 +21,7 @@ python src/psycopt2d/train_model.py --config-name test_config.yaml +model=xgboos
 
 To test new integrations with WandB:
 ```python
-python src/psycopt2d/train_model.py +model=xgboost project.wandb_model="run" --config-name integration_testing.yaml
+python src/psycopt2d/train_model.py +model=xgboost project.wandb_mode="run" --config-name integration_testing.yaml
 ```
 
 

diff --git a/src/psycopt2d/config/sweeper/optuna_singlethread.yaml b/src/psycopt2d/config/sweeper/optuna_singlethread.yaml
@@ -7,5 +7,5 @@ hydra:
   sweeper:
     sampler:
       seed: 123
-    n_trials: 1000
+    n_trials: 3
     direction: maximize
diff --git a/src/psycopt2d/dataclasses/configs.py b/src/psycopt2d/dataclasses/configs.py
@@ -0,0 +1,19 @@
+"""Dataclasses used in the project."""
+from typing import Optional
+
+import pandas as pd
+from omegaconf import DictConfig
+from pydantic import BaseModel
+
+# pylint: disable=missing-class-docstring, too-few-public-methods
+
+
+class ModelEvalData(BaseModel):
+    """Dataclass for model evaluation data."""
+
+    class Config:
+        arbitrary_types_allowed = True
+
+    df: pd.DataFrame
+    cfg: DictConfig
+    feature_importance_dict: Optional[dict[str, float]] = None
diff --git a/src/psycopt2d/evaluate_saved_model_predictions.py b/src/psycopt2d/evaluate_saved_model_predictions.py
@@ -2,73 +2,37 @@
 pickle file with model predictions and hydra config.
 
 Possible extensions (JIT when needed):
-- Load most recent pickle file from 'evaluation_results' folder
+- Load most recent directory from 'evaluation_results'/Overtaci equivalent folder
 - Evaluate all models in 'evaluation_results' folder
 - CLI for evaluating a model
 """
-from pathlib import Path
 
-import pandas as pd
-from omegaconf.dictconfig import DictConfig
-
-from psycopt2d.utils import PROJECT_ROOT, read_pickle
+from psycopt2d.utils import (
+    PROJECT_ROOT,
+    infer_outcome_col_name,
+    infer_y_hat_prob_col_name,
+    load_evaluation_data,
+)
 from psycopt2d.visualization import plot_auc_by_time_from_first_visit
 
-
-def infer_outcome_col_name(df: pd.DataFrame, prefix: str = "outc_") -> str:
-    """Infer the outcome column name from the dataframe."""
-    outcome_name = [c for c in df.columns if c.startswith(prefix)]
-    if len(outcome_name) == 1:
-        return outcome_name[0]
-    else:
-        raise ValueError("More than one outcome inferred")
-
-
-def infer_predictor_col_names(df: pd.DataFrame, cfg: DictConfig) -> list[str]:
-    """Get the predictors that are used in the model.
-
-    Args:
-        df (pd.Dataframe): Dataframe with model predictions
-        cfg (DictConfig): Config file
-
-    Returns:
-        list[str]: list of predictors
-    """
-    return [c for c in df.columns if c.startswith(cfg.data.pred_col_name_prefix)]
-
-
-def load_model_predictions_and_cfg(path: Path) -> tuple[pd.DataFrame, DictConfig]:
-    """Load model predictions and config file from a pickle file.
-
-    Args:
-        path (Path): Path to pickle file
-    """
-    obj = read_pickle(path)
-    return obj["df"], obj["cfg"]
-
-
 if __name__ == "__main__":
 
     eval_path = PROJECT_ROOT / "evaluation_results"
-    eval_df, cfg = load_model_predictions_and_cfg(
-        eval_path
-        # insert your own model path here
-        / "eval_model_name-xgboost_require_imputation-True_args-n_estimators-100_tree_method-auto_2022_09_22_10_52.pkl",
-    )
+    # change to whatever modele you wish to evaluate
+    eval_data = load_evaluation_data(eval_path / "2022_10_18_13_23_2h3cxref")
 
-    train_col_names = infer_predictor_col_names(eval_df, cfg)
-    y_col_name = infer_outcome_col_name(eval_df)
+    y_col_name = infer_outcome_col_name(eval_data.df)
 
-    Y_HAT_PROB_COL_NAME = "y_hat_prob"  # change to 'y_hat_prob_oof' if using cv
+    y_hat_prob_name = infer_y_hat_prob_col_name(eval_data.df)
 
-    first_visit_timestamp = eval_df.groupby(cfg.data.id_col_name)[
-        cfg.data.pred_timestamp_col_name
+    first_visit_timestamp = eval_data.df.groupby(eval_data.cfg.data.id_col_name)[
+        eval_data.cfg.data.pred_timestamp_col_name
     ].transform("min")
 
     # Do whatever extra evaluation you want to here
     p = plot_auc_by_time_from_first_visit(
-        labels=eval_df[y_col_name],
-        y_hat_probs=eval_df[Y_HAT_PROB_COL_NAME],
+        labels=eval_data.df[y_col_name],
+        y_hat_probs=eval_data.df[y_hat_prob_name],
         first_visit_timestamps=first_visit_timestamp,
-        prediction_timestamps=eval_df[cfg.data.pred_timestamp_col_name],
+        prediction_timestamps=eval_data.df[eval_data.cfg.data.pred_timestamp_col_name],
     )
diff --git a/src/psycopt2d/evaluation.py b/src/psycopt2d/evaluation.py
@@ -1,26 +1,20 @@
 """Functions for evaluating a model's predictions."""
 from collections.abc import Iterable
 from pathlib import Path
-from typing import Optional, Union
+from typing import Optional
 
 import numpy as np
 import pandas as pd
 from omegaconf.dictconfig import DictConfig
 from sklearn.metrics import recall_score, roc_auc_score
-from sklearn.pipeline import Pipeline
 from wandb.sdk.wandb_run import Run as wandb_run  # pylint: disable=no-name-in-module
 from wasabi import Printer
 
 from psycopt2d.tables import generate_feature_importances_table
 from psycopt2d.tables.performance_by_threshold import (
     generate_performance_by_positive_rate_table,
 )
-from psycopt2d.utils import (
-    AUC_LOGGING_FILE_PATH,
-    PROJECT_ROOT,
-    positive_rate_to_pred_probs,
-    prediction_df_with_metadata_to_disk,
-)
+from psycopt2d.utils import PROJECT_ROOT, positive_rate_to_pred_probs
 from psycopt2d.visualization import (
     plot_auc_by_time_from_first_visit,
     plot_feature_importances,
@@ -35,58 +29,37 @@
 
 def log_feature_importances(
     cfg: DictConfig,
-    pipe: Pipeline,
-    feature_names: Iterable[str],
+    feature_importance_dict: dict[str, float],
     run: wandb_run,
     save_path: Optional[Path] = None,
 ) -> dict[str, Path]:
     """Log feature importances to wandb."""
-    # Handle EBM differently as it autogenerates interaction terms
-    if cfg.model.model_name == "ebm":
-        feature_names = pipe["model"].feature_names
-
     feature_importance_plot_path = plot_feature_importances(
-        column_names=feature_names,
-        feature_importances=pipe["model"].feature_importances_,
+        feature_names=feature_importance_dict.keys(),
+        feature_importances=feature_importance_dict.values(),
         top_n_feature_importances=cfg.evaluation.top_n_feature_importances,
         save_path=save_path,
     )
 
     # Log as table too for readability
     feature_importances_table = generate_feature_importances_table(
-        feature_names=feature_names,
-        feature_importances=pipe["model"].feature_importances_,
+        feature_names=feature_importance_dict.keys(),
+        feature_importances=feature_importance_dict.values(),
     )
 
     run.log({"feature_importance_table": feature_importances_table})
 
     return {"feature_importance": feature_importance_plot_path}
 
 
-def log_auc_to_file(cfg: DictConfig, run: wandb_run, auc: Union[float, int]):
-    """Log AUC to file."""
-    # log AUC and run ID to a file to find the best run later
-    # Only create the file if it doesn't exists (will be auto-deleted/moved after
-    # syncing). This is to avoid creating a new file every time the script is run
-    # e.g. during a hyperparameter seacrch.
-    if cfg.project.wandb_mode in {"offline", "dryrun"}:
-        if not AUC_LOGGING_FILE_PATH.exists():
-            AUC_LOGGING_FILE_PATH.parent.mkdir(parents=True, exist_ok=True)
-            AUC_LOGGING_FILE_PATH.touch()
-            AUC_LOGGING_FILE_PATH.write_text("run_id,auc\n")
-        with open(AUC_LOGGING_FILE_PATH, "a", encoding="utf-8") as f:
-            f.write(f"{run.id},{auc}\n")
-
-
 def evaluate_model(
     cfg,
-    pipe: Pipeline,
     eval_df: pd.DataFrame,
     y_col_name: str,
-    train_col_names: Iterable[str],
     y_hat_prob_col_name: str,
     run: wandb_run,
-):
+    feature_importance_dict: Optional[dict[str, float]],
+) -> None:
     """Runs the evaluation suite on the model and logs to WandB.
     At present, this includes:
     1. AUC
@@ -99,12 +72,12 @@ def evaluate_model(
 
     Args:
         cfg (OmegaConf): The hydra config from the run
-        pipe (Pipeline): Pipeline including the model
         eval_df (pd.DataFrame): Evalaution split
         y_col_name (str): Label column name
-        train_col_names (Iterable[str]): Column names for all predictors
         y_hat_prob_col_name (str): Column name containing pred_proba output
         run (wandb_run): WandB run to log to.
+        feature_importance_dict (Optional[dict[str, float]]): Dict of feature
+            names and their importance. If None, will not log feature importance.
     """
     msg = Printer(timestamp=True)
 
@@ -159,11 +132,8 @@ def evaluate_model(
     )
 
     msg.info(f"AUC: {auc}")
-    run.log({"roc_auc_unweighted": auc})
     run.log({"1_minus_roc_auc_unweighted": 1 - auc})
 
-    log_auc_to_file(cfg, run=run, auc=auc)
-
     # Tables
     # Performance by threshold
     performance_by_threshold_df = generate_performance_by_positive_rate_table(
@@ -183,26 +153,16 @@ def evaluate_model(
     plots = {}
 
     # Feature importance
-    if hasattr(pipe["model"], "feature_importances_"):
-        feature_names = train_col_names
-
+    if feature_importance_dict is not None:
         feature_importances_plot_dict = log_feature_importances(
             cfg=cfg,
-            pipe=pipe,
-            feature_names=train_col_names,
+            feature_importance_dict=feature_importance_dict,
             run=run,
             save_path=SAVE_DIR / "feature_importances.png",
         )
 
         plots.update(feature_importances_plot_dict)
 
-        # Log as table too for readability
-        feature_importances_table = generate_feature_importances_table(
-            feature_names=feature_names,
-            feature_importances=pipe["model"].feature_importances_,
-        )
-        run.log({"feature_importance_table": feature_importances_table})
-
     # Add plots
     plots.update(
         {
@@ -249,7 +209,4 @@ def evaluate_model(
     for chart_name, chart_path in plots.items():
         log_image_to_wandb(chart_path=chart_path, chart_name=chart_name, run=run)
 
-    # Save results to disk
-    prediction_df_with_metadata_to_disk(df=eval_df, cfg=cfg, run=run)
-
     msg.info("Finished model evaluation, logging charts to WandB")