From 9ff6e91a9e8bd0f63249af9904cb06f7ff3ed1f6 Mon Sep 17 00:00:00 2001 From: bokajgd Date: Fri, 7 Oct 2022 13:48:01 +0200 Subject: [PATCH 1/4] pass emptly list to cat_features Fixes #38 --- .../data_checks/flattened/data_integrity.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/psycop_feature_generation/data_checks/flattened/data_integrity.py b/src/psycop_feature_generation/data_checks/flattened/data_integrity.py index 38d0c24e..8fba8e1b 100644 --- a/src/psycop_feature_generation/data_checks/flattened/data_integrity.py +++ b/src/psycop_feature_generation/data_checks/flattened/data_integrity.py @@ -170,6 +170,7 @@ def check_train_data_integrity( df=train_predictors, index_name="dw_ek_borger", datetime_name="timestamp", + cat_features=[], ) # Running checks that do not require a label @@ -193,6 +194,7 @@ def check_train_data_integrity( index_name="dw_ek_borger", datetime_name="timestamp", label=train_outcomes_df[outcome_column], + cat_features=[], ) suite_results = label_checks.run(data_s) @@ -417,7 +419,7 @@ def save_feature_set_integrity_from_dir( # noqa pylint: disable=too-many-statem if not out_dir.exists(): out_dir.mkdir() - train_outcomes_df = load_split_outcomes( + train_outcomes_df = load_split_ƒoutcomes( feature_set_dir=feature_set_dir, split="train", nrows=n_rows, From 667a9053f89413ada54624ae19d0d7e880724573 Mon Sep 17 00:00:00 2001 From: bokajgd Date: Fri, 7 Oct 2022 14:05:48 +0200 Subject: [PATCH 2/4] fix: Resolves errors caused from auto cat features --- .../data_checks/flattened/data_integrity.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/psycop_feature_generation/data_checks/flattened/data_integrity.py b/src/psycop_feature_generation/data_checks/flattened/data_integrity.py index 8fba8e1b..1603a3f8 100644 --- a/src/psycop_feature_generation/data_checks/flattened/data_integrity.py +++ b/src/psycop_feature_generation/data_checks/flattened/data_integrity.py @@ -1,4 +1,4 @@ -"""Code to generate data integrity and train/val/test drift reports.""" +"""Code to generate data integrity report and train/val/test drift reports.""" from pathlib import Path from typing import Any, Optional From f2447156beef5128819f97f7a9554d03d394e01a Mon Sep 17 00:00:00 2001 From: bokajgd Date: Fri, 7 Oct 2022 14:23:59 +0200 Subject: [PATCH 3/4] fix: auto inferred cat features error --- .../data_checks/flattened/data_integrity.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/psycop_feature_generation/data_checks/flattened/data_integrity.py b/src/psycop_feature_generation/data_checks/flattened/data_integrity.py index 1603a3f8..908333b3 100644 --- a/src/psycop_feature_generation/data_checks/flattened/data_integrity.py +++ b/src/psycop_feature_generation/data_checks/flattened/data_integrity.py @@ -419,7 +419,7 @@ def save_feature_set_integrity_from_dir( # noqa pylint: disable=too-many-statem if not out_dir.exists(): out_dir.mkdir() - train_outcomes_df = load_split_ƒoutcomes( + train_outcomes_df = load_split_outcomes( feature_set_dir=feature_set_dir, split="train", nrows=n_rows, From ea0d946cbf658d8d7e22d45363f9dd7d5a7e3fff Mon Sep 17 00:00:00 2001 From: bokajgd Date: Fri, 7 Oct 2022 15:21:26 +0200 Subject: [PATCH 4/4] fix: auto inferred cat features --- .../data_checks/flattened/data_integrity.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/psycop_feature_generation/data_checks/flattened/data_integrity.py b/src/psycop_feature_generation/data_checks/flattened/data_integrity.py index 222902d8..02aa848f 100644 --- a/src/psycop_feature_generation/data_checks/flattened/data_integrity.py +++ b/src/psycop_feature_generation/data_checks/flattened/data_integrity.py @@ -283,6 +283,7 @@ def get_split_as_ds_dict( df=predictors, index_name="dw_ek_borger", datetime_name="timestamp", + cat_features=[], ) return {