Skip to content

Commit

Permalink
feat: update cfg experiments (#909)
Browse files Browse the repository at this point in the history
  • Loading branch information
MartinBernstorff authored May 15, 2024
1 parent a18f876 commit e37e6f1
Show file tree
Hide file tree
Showing 5 changed files with 73 additions and 19 deletions.
16 changes: 8 additions & 8 deletions psycop/projects/cvd/model_training/cvd_baseline.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

[logger.*.mlflow]
@loggers = "mlflow_logger"
experiment_name = "cvd_hyperparam_tuning_joblib"
experiment_name = "baseline_v2_cvd"
postpone_run_creation_to_first_log = True

[trainer]
Expand All @@ -21,7 +21,7 @@ group_col_name = "dw_ek_borger"

[trainer.training_data]
@data = "parquet_vertical_concatenator"
paths = ["E:/shared_resources/cvd/feature_set/flattened_datasets/cvd_lookbehind_experiments/train.parquet", "E:/shared_resources/cvd/feature_set/flattened_datasets/cvd_lookbehind_experiments/val.parquet"]
paths = ["E:/shared_resources/cvd/feature_set/flattened_datasets/train.parquet", "E:/shared_resources/cvd/feature_set/flattened_datasets/val.parquet"]
validate_on_init = False

#################
Expand All @@ -45,12 +45,12 @@ age_col_name = "pred_age_in_years"
[trainer.preprocessing_pipeline.*.layer_selector]
@preprocessing = "filter_columns_within_subset"
subset_rule = "pred_.+layer.+"
keep_matching = ".+_layer_(1|2|3).+"
keep_matching = ".+_layer_(1).+"

# [trainer.preprocessing_pipeline.*.aggregation_selector]
# @preprocessing = "filter_columns_within_subset"
# subset_rule = "pred_.+layer.+"
# keep_matching = ".+_(mean|max|min)_.+"
[trainer.preprocessing_pipeline.*.aggregation_selector]
@preprocessing = "filter_columns_within_subset"
subset_rule = "pred_.+layer.+"
keep_matching = ".+_(mean)_.+"

## Outcomes
[trainer.preprocessing_pipeline.*.outcome_selector]
Expand All @@ -75,7 +75,7 @@ column_expectations = [["outc_", 1], ["prediction_timestamp", 0]]
# Estimator #
#############
[trainer.task.task_pipe.sklearn_pipe.*.model]
@estimator_steps_suggesters = "lightgbm_suggester"
@estimator_steps = "xgboost"

########
# Task #
Expand Down
39 changes: 39 additions & 0 deletions psycop/projects/cvd/model_training/train_cvd_layers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
import logging
from pathlib import Path

import confection

from psycop.common.model_training_v2.config.baseline_pipeline import train_baseline_model_from_cfg
from psycop.common.model_training_v2.config.populate_registry import populate_baseline_registry
from psycop.projects.cvd.model_training.populate_cvd_registry import populate_with_cvd_registry

if __name__ == "__main__":
import coloredlogs

coloredlogs.install( # type: ignore
level="INFO",
format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
datefmt="%Y/%m/%d %H:%M:%S",
)

populate_baseline_registry()
populate_with_cvd_registry()

for layer in range(1, 5):
cfg = confection.Config().from_disk(Path(__file__).parent / "cvd_baseline.cfg")
layers = [str(i) for i in range(1, layer + 1)]
cfg["trainer"]["preprocessing_pipeline"]["*"]["layer_selector"][
"keep_matching"
] = f".+_layer_({'|'.join(layers)}).+"

logging.info(f"Training model with layers {layers}")
train_baseline_model_from_cfg(cfg=cfg)

if layer == 1:
aggs = ".+(mean|min|max).+"
cfg["trainer"]["preprocessing_pipeline"]["*"]["aggregation_selector"][
"keep_matching"
] = aggs

logging.info(f"Training model with {aggs}")
train_baseline_model_from_cfg(cfg=cfg)
26 changes: 26 additions & 0 deletions psycop/projects/cvd/model_training/train_cvd_lookbehinds.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
from pathlib import Path

import confection

from psycop.common.model_training_v2.config.baseline_pipeline import train_baseline_model_from_cfg
from psycop.common.model_training_v2.config.populate_registry import populate_baseline_registry
from psycop.projects.cvd.model_training.populate_cvd_registry import populate_with_cvd_registry

if __name__ == "__main__":
import coloredlogs

coloredlogs.install( # type: ignore
level="INFO",
format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
datefmt="%Y/%m/%d %H:%M:%S",
)

populate_baseline_registry()
populate_with_cvd_registry()

cfg = confection.Config().from_disk(Path(__file__).parent / "cvd_baseline.cfg")
cfg["trainer"]["training_data"]["paths"] = [
f"E:/shared_resources/cvd/feature_set/flattened_datasets/cvd_lookbehind_experiments/{split}.parquet"
for split in ["train", "test"]
]
train_baseline_model_from_cfg(cfg)
10 changes: 0 additions & 10 deletions psycop/projects/cvd/model_training/train_cvd_model.py

This file was deleted.

1 change: 0 additions & 1 deletion psycop/timeseriesflattener
Submodule timeseriesflattener deleted from 4b89ec

0 comments on commit e37e6f1

Please sign in to comment.