From 04dfd7e7e038472cfd26f67c79a6b050cc13b15e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakob=20Gr=C3=B8hn?= Date: Thu, 17 Nov 2022 15:45:54 +0100 Subject: [PATCH 1/5] feat: at groupspec init, iterate over values_loader and check that they exist in the loader registry Fixes #87 --- .../timeseriesflattener/feature_spec_objects.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/psycop_feature_generation/timeseriesflattener/feature_spec_objects.py b/src/psycop_feature_generation/timeseriesflattener/feature_spec_objects.py index ba6c9348..968388f3 100644 --- a/src/psycop_feature_generation/timeseriesflattener/feature_spec_objects.py +++ b/src/psycop_feature_generation/timeseriesflattener/feature_spec_objects.py @@ -266,6 +266,20 @@ class MinGroupSpec(BaseModel): def __init__(self, **data): super().__init__(**data) + invalid_loaders = list() + + for loader in self.values_loader: + try: + loader in data_loaders.get_all() + except: + invalid_loaders.append(loader) + + if len(invalid_loaders) != 0: + nl = '\n' + raise ValueError(f"""Following loaders were not found in data_loaders registry: {nl}{nl.join(str(loader) for loader in invalid_loaders)}{nl}Available loaders: {nl}{nl.join(str(loader) for loader in data_loaders.get_all())}""") + + + if self.output_col_name_override: input_col_name = ( "value" From ba2d4c540f097c33ca5c29a0b72a908ad6dc04e3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakob=20Gr=C3=B8hn?= Date: Thu, 17 Nov 2022 16:07:54 +0100 Subject: [PATCH 2/5] fix: find invalid loaders --- .../feature_spec_objects.py | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/src/psycop_feature_generation/timeseriesflattener/feature_spec_objects.py b/src/psycop_feature_generation/timeseriesflattener/feature_spec_objects.py index 968388f3..af111170 100644 --- a/src/psycop_feature_generation/timeseriesflattener/feature_spec_objects.py +++ b/src/psycop_feature_generation/timeseriesflattener/feature_spec_objects.py @@ -265,20 +265,12 @@ class MinGroupSpec(BaseModel): def __init__(self, **data): super().__init__(**data) - - invalid_loaders = list() - - for loader in self.values_loader: - try: - loader in data_loaders.get_all() - except: - invalid_loaders.append(loader) - + + # Check that all passed loaders are valid + invalid_loaders = list(set(self.values_loader) - set(data_loaders.get_all())) if len(invalid_loaders) != 0: - nl = '\n' - raise ValueError(f"""Following loaders were not found in data_loaders registry: {nl}{nl.join(str(loader) for loader in invalid_loaders)}{nl}Available loaders: {nl}{nl.join(str(loader) for loader in data_loaders.get_all())}""") - - + raise ValueError(f"""Following loaders were not found in data_loaders registry:{' '.join(str(loader) for loader in invalid_loaders)} + Available loaders: {' '.join(str(loader) for loader in data_loaders.get_all())}""") if self.output_col_name_override: input_col_name = ( From d92f7989af27a879fd090bed33ce5027e96e581b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakob=20Gr=C3=B8hn?= Date: Fri, 18 Nov 2022 10:05:44 +0100 Subject: [PATCH 3/5] fix: better valueerror message --- .../timeseriesflattener/feature_spec_objects.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/psycop_feature_generation/timeseriesflattener/feature_spec_objects.py b/src/psycop_feature_generation/timeseriesflattener/feature_spec_objects.py index af111170..827356f8 100644 --- a/src/psycop_feature_generation/timeseriesflattener/feature_spec_objects.py +++ b/src/psycop_feature_generation/timeseriesflattener/feature_spec_objects.py @@ -262,15 +262,16 @@ class MinGroupSpec(BaseModel): allowed_nan_value_prop: list[float] = [0.0] # If NaN is higher than this in the input dataframe during resolution, raise an error. - + def __init__(self, **data): super().__init__(**data) # Check that all passed loaders are valid invalid_loaders = list(set(self.values_loader) - set(data_loaders.get_all())) if len(invalid_loaders) != 0: - raise ValueError(f"""Following loaders were not found in data_loaders registry:{' '.join(str(loader) for loader in invalid_loaders)} - Available loaders: {' '.join(str(loader) for loader in data_loaders.get_all())}""") + raise ValueError(f"""Invalid loader(s) specified. Following loaders were not found in the data_loaders registry: + {' '.join(str(loader) for loader in invalid_loaders)} + Available loaders:{' '.join(str(loader) for loader in data_loaders.get_all())}""") if self.output_col_name_override: input_col_name = ( @@ -316,6 +317,7 @@ def create_specs_from_group( # Create all combinations of top level elements # For each attribute in the FeatureGroupSpec + feature_group_spec_dict = feature_group_spec.__dict__ permuted_dicts = create_feature_combinations_from_dict(d=feature_group_spec_dict) From 7b3b994cbe38df73a4149c4463b5f283ad297218 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakob=20Gr=C3=B8hn?= Date: Fri, 18 Nov 2022 11:41:07 +0100 Subject: [PATCH 4/5] fix: bettee valueerror message formatting --- .../timeseriesflattener/feature_spec_objects.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/src/psycop_feature_generation/timeseriesflattener/feature_spec_objects.py b/src/psycop_feature_generation/timeseriesflattener/feature_spec_objects.py index 827356f8..8b1996ae 100644 --- a/src/psycop_feature_generation/timeseriesflattener/feature_spec_objects.py +++ b/src/psycop_feature_generation/timeseriesflattener/feature_spec_objects.py @@ -262,16 +262,21 @@ class MinGroupSpec(BaseModel): allowed_nan_value_prop: list[float] = [0.0] # If NaN is higher than this in the input dataframe during resolution, raise an error. - + def __init__(self, **data): super().__init__(**data) - + # Check that all passed loaders are valid - invalid_loaders = list(set(self.values_loader) - set(data_loaders.get_all())) + invalid_loaders = list( + set(self.values_loader) - set(data_loaders.get_all().keys()) + ) if len(invalid_loaders) != 0: - raise ValueError(f"""Invalid loader(s) specified. Following loaders were not found in the data_loaders registry: - {' '.join(str(loader) for loader in invalid_loaders)} - Available loaders:{' '.join(str(loader) for loader in data_loaders.get_all())}""") + nl = "\n" # New line variable as f-string can't handle backslashes + raise ValueError( + f"""Invalid loader(s) specified.{nl}{nl}Following loaders were not found in the data_loaders registry:""" + f"""{nl}{nl.join(str(loader) for loader in invalid_loaders)}{nl}{nl}""" + f"""Available loaders:{nl}{nl.join(str(loader) for loader in data_loaders.get_all().keys())}""" + ) if self.output_col_name_override: input_col_name = ( From b7849911c85ca6ac5bd165b7a48ccce1a768f70b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakob=20Gr=C3=B8hn=20Damgaard?= <54889907+bokajgd@users.noreply.github.com> Date: Fri, 18 Nov 2022 12:57:11 +0100 Subject: [PATCH 5/5] fix: more explanation in error message Co-authored-by: Martin Bernstorff --- .../timeseriesflattener/feature_spec_objects.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/psycop_feature_generation/timeseriesflattener/feature_spec_objects.py b/src/psycop_feature_generation/timeseriesflattener/feature_spec_objects.py index 8b1996ae..ab61f8a1 100644 --- a/src/psycop_feature_generation/timeseriesflattener/feature_spec_objects.py +++ b/src/psycop_feature_generation/timeseriesflattener/feature_spec_objects.py @@ -273,7 +273,8 @@ def __init__(self, **data): if len(invalid_loaders) != 0: nl = "\n" # New line variable as f-string can't handle backslashes raise ValueError( - f"""Invalid loader(s) specified.{nl}{nl}Following loaders were not found in the data_loaders registry:""" + f"""Some loader strings could not be resolved in the data_loaders catalogue. Did you make a typo? If you want to add your own loaders to the catalogue, see explosion / catalogue on GitHub for info. + {nl*2}Loaders that could not be resolved:""" f"""{nl}{nl.join(str(loader) for loader in invalid_loaders)}{nl}{nl}""" f"""Available loaders:{nl}{nl.join(str(loader) for loader in data_loaders.get_all().keys())}""" )