From 0101accb995d060908b28f1338a313d82661683a Mon Sep 17 00:00:00 2001 From: Lasse Hansen Date: Wed, 5 Oct 2022 15:46:47 +0200 Subject: [PATCH 1/3] fix: hardcoded file suffix --- .../data_checks/flattened/data_integrity.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/psycop_feature_generation/data_checks/flattened/data_integrity.py b/src/psycop_feature_generation/data_checks/flattened/data_integrity.py index 9e767e1c..623a6731 100644 --- a/src/psycop_feature_generation/data_checks/flattened/data_integrity.py +++ b/src/psycop_feature_generation/data_checks/flattened/data_integrity.py @@ -401,6 +401,8 @@ def save_feature_set_integrity_from_dir( # noqa pylint: disable=too-many-statem out_dir (Optional[Path]): Path to the directory where the reports should be saved file_suffix (str, optional): Suffix of the files to load. Must be either "csv" or "parquet". """ + if file_suffix is None: + file_suffix = "parquet" if file_suffix not in ["parquet", "csv"]: raise ValueError( f"file_suffix must be either 'parquet' or 'csv', got {file_suffix}", @@ -421,7 +423,7 @@ def save_feature_set_integrity_from_dir( # noqa pylint: disable=too-many-statem feature_set_dir=feature_set_dir, split="train", nrows=n_rows, - file_suffix="parquet", + file_suffix=file_suffix, ) failed_checks = ( From b6f18a2a30dffdba5ee90ba2bf96792157159945 Mon Sep 17 00:00:00 2001 From: Martin Bernstorff Date: Wed, 5 Oct 2022 15:49:09 +0200 Subject: [PATCH 2/3] Update data_integrity.py --- .../data_checks/flattened/data_integrity.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/psycop_feature_generation/data_checks/flattened/data_integrity.py b/src/psycop_feature_generation/data_checks/flattened/data_integrity.py index 623a6731..da5ee630 100644 --- a/src/psycop_feature_generation/data_checks/flattened/data_integrity.py +++ b/src/psycop_feature_generation/data_checks/flattened/data_integrity.py @@ -385,7 +385,7 @@ def save_feature_set_integrity_from_dir( # noqa pylint: disable=too-many-statem n_rows: Optional[int] = None, split_names: Optional[list[str]] = None, out_dir: Optional[Path] = None, - file_suffix: Optional[str] = None, + file_suffix: str = "parquet", ) -> None: """Runs Deepcheck data integrity and train/val/test checks for a given directory containing train/val/test files. Splits indicates which data. @@ -401,8 +401,6 @@ def save_feature_set_integrity_from_dir( # noqa pylint: disable=too-many-statem out_dir (Optional[Path]): Path to the directory where the reports should be saved file_suffix (str, optional): Suffix of the files to load. Must be either "csv" or "parquet". """ - if file_suffix is None: - file_suffix = "parquet" if file_suffix not in ["parquet", "csv"]: raise ValueError( f"file_suffix must be either 'parquet' or 'csv', got {file_suffix}", From 8d0a28b89cea0f5afa8a233cc850f13085d26bc8 Mon Sep 17 00:00:00 2001 From: Lasse Hansen Date: Wed, 5 Oct 2022 15:54:56 +0200 Subject: [PATCH 3/3] chore: update docstring --- .../data_checks/flattened/data_integrity.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/psycop_feature_generation/data_checks/flattened/data_integrity.py b/src/psycop_feature_generation/data_checks/flattened/data_integrity.py index da5ee630..38d0c24e 100644 --- a/src/psycop_feature_generation/data_checks/flattened/data_integrity.py +++ b/src/psycop_feature_generation/data_checks/flattened/data_integrity.py @@ -399,7 +399,7 @@ def save_feature_set_integrity_from_dir( # noqa pylint: disable=too-many-statem Should only be used for debugging. split_names (list[str]): list of splits to check (train, val, test) out_dir (Optional[Path]): Path to the directory where the reports should be saved - file_suffix (str, optional): Suffix of the files to load. Must be either "csv" or "parquet". + file_suffix (str, optional): Suffix of the files to load. Must be either "csv" or "parquet". Defaults to "parquet". """ if file_suffix not in ["parquet", "csv"]: raise ValueError(