From 83eb2031808b20bbe5b64ce9de289e7c520f0c58 Mon Sep 17 00:00:00 2001 From: Sandrro Date: Fri, 22 Nov 2024 15:13:09 +0300 Subject: [PATCH] fix(AreaMatcher): changed osm to optional --- sloyka/src/utils/data_processing/area_matcher.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/sloyka/src/utils/data_processing/area_matcher.py b/sloyka/src/utils/data_processing/area_matcher.py index ae38b8e..19b33fe 100644 --- a/sloyka/src/utils/data_processing/area_matcher.py +++ b/sloyka/src/utils/data_processing/area_matcher.py @@ -137,15 +137,20 @@ def match_group_to_area(self, group_name, df_areas): max_partial_ratio = partial_ratio max_token_sort_ratio = token_sort_ratio best_match = row["area_name"] - admin_level = row["admin_level"] - return best_match, admin_level + return best_match - def run(self, df, place_name): + def run(self, df, from_osm: bool = True, areas = None, place_name: str = None): df['processed_group_name'] = df.group_name.map(lambda x: self.preprocess_group_name(x)) - df_areas = self.get_osm_areas(place_name) + print('processed group names') + if from_osm: + df_areas = self.get_osm_areas(place_name) + else: + df_areas = areas df_areas = self.preprocess_area_names(df_areas) - df[["best_match", "admin_level"]] = df.apply( + print('processed area names') + df["best_match"] = df.apply( lambda row: pd.Series(self.match_group_to_area(row["processed_group_name"], df_areas)), axis=1 ) + print('found matches') return df \ No newline at end of file