From 829cc865d8f4eda951903e6d1fd00fa44937ff39 Mon Sep 17 00:00:00 2001 From: Raphael Rivas Date: Wed, 21 Dec 2022 17:28:29 -0300 Subject: [PATCH 1/3] search shape_id (1 or more) in shapes --- mobilidade_rio/mobilidade_rio/pontos/views.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/mobilidade_rio/mobilidade_rio/pontos/views.py b/mobilidade_rio/mobilidade_rio/pontos/views.py index f58b946..a5e1294 100644 --- a/mobilidade_rio/mobilidade_rio/pontos/views.py +++ b/mobilidade_rio/mobilidade_rio/pontos/views.py @@ -93,6 +93,17 @@ class ShapesViewSet(viewsets.ModelViewSet): queryset = Shapes.objects.all().order_by("shape_id") pagination_class = LargePagination + def get_queryset(self): + queryset = Shapes.objects.all().order_by("shape_id") + + # fillter by shape_id + shape_id = self.request.query_params.get("shape_id") + if shape_id is not None: + shape_id = shape_id.split(",") + queryset = queryset.filter(shape_id__in=shape_id).order_by("shape_id") + + return queryset + class StopsViewSet(viewsets.ModelViewSet): From 72252ebfffaf67676186284de2c510b16330144b Mon Sep 17 00:00:00 2001 From: Raphael Rivas Date: Fri, 23 Dec 2022 12:35:28 -0300 Subject: [PATCH 2/3] add remove_cols_containing in settings.json --- scripts/populate_db/settings.json | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/scripts/populate_db/settings.json b/scripts/populate_db/settings.json index 66836df..1388f77 100644 --- a/scripts/populate_db/settings.json +++ b/scripts/populate_db/settings.json @@ -39,6 +39,13 @@ "trip_id" ] }, + "remove_cols_containing": { + "pontos_routes": { + "route_short_name": [ + "SN" + ] + } + }, "flag_params": [ "empty_tables" ] From 194244786a7c8229d323e460aa2899664d21aab9 Mon Sep 17 00:00:00 2001 From: Raphael Rivas Date: Fri, 23 Dec 2022 13:15:53 -0300 Subject: [PATCH 3/3] filter substring and refactor function --- scripts/populate_db/populate_db.py | 53 +++++++++++++++++++++++++----- 1 file changed, 44 insertions(+), 9 deletions(-) diff --git a/scripts/populate_db/populate_db.py b/scripts/populate_db/populate_db.py index 368bac5..e309971 100644 --- a/scripts/populate_db/populate_db.py +++ b/scripts/populate_db/populate_db.py @@ -175,22 +175,48 @@ def validate_col_values( len_history = [len(data)] # remove duplicates - cols_duplicates = [] + remove_duplicate_cols_table_commas = [] + remove_duplicate_cols_table_no_commas = [] if table_name in remove_duplicate_cols: - cols_duplicates = validate_col_names(table_name, remove_duplicate_cols[table_name]) + # separate cols with commas, without and unique + for col in remove_duplicate_cols[table_name]: + if "," in col: + cols_1 = validate_col_names(table_name, col.split(",")) + remove_duplicate_cols_table_commas.append(cols_1) + else: + remove_duplicate_cols_table_no_commas.append(col) + remove_duplicate_cols_table_no_commas = validate_col_names( + table_name, remove_duplicate_cols_table_no_commas) + # validate cols - cols_validates = [] + validade_cols_table = [] if table_name in validate_cols: - cols_validates = validate_col_names(table_name, validate_cols[table_name]) + validade_cols_table = validate_col_names( + table_name, validate_cols[table_name]) + + # remove_cols_containing + remove_cols_containing = [] + remove_cols_containing_table = [] + if 'remove_cols_containing' in settings: + remove_cols_containing = settings['remove_cols_containing'] + if table_name in remove_cols_containing: + remove_cols_containing_table = remove_cols_containing[table_name] + # validate cols for each table + remove_cols_containing_table = validate_col_names( + table_name, remove_cols_containing_table) + # enforce types cols_set_types = [] if 'enforce_type_cols' in settings and table_name in settings['enforce_type_cols']: - cols_set_types = validate_col_names(table_name, settings['enforce_type_cols'][table_name]) + cols_set_types = validate_col_names( + table_name, settings['enforce_type_cols'][table_name]) - # cols_validates = validate_col_names(table_name, validate_cols[table_name]) - if table_name in validate_cols: + # run validate cols + if (table_name in validate_cols + or table_name in remove_duplicate_cols + or table_name in remove_cols_containing): for col in cols: - if col in cols_validates: + if col in validade_cols_table: # ? if value ends with _1, split and remove _1 data[col] = data[col].str.split("_1").str[0] data = data.copy() @@ -200,18 +226,27 @@ def validate_col_values( data = data[~data[col].str.contains("_")].copy() # remove duplicates - if remove_duplicates and col in cols_duplicates: + if remove_duplicates and col in remove_duplicate_cols_table_no_commas: data = data.drop_duplicates(subset=[col]).copy() # convert to type if col in cols_set_types: if cols_set_types[col] == "int": data[col] = data[col].astype("Int64") + + # remove cols containing + if col in remove_cols_containing_table: + print(f"Removing_cols_containing: {col}") + # drop rows if col value contains part of substring + for substring in remove_cols_containing_table[col]: + data = data[~data[col].str.contains(substring)].copy() + len_history.append(len(data)) # print("CVC hist", len_history) return convert_to_type(data, data_type, ret_type, table_name + ".txt") + def upload_data(_app: str, _model: str): """ Upload data to Postgres