Optimized filter mappings

Changed the t2t._filter_mappings function to use vectorization instead of loops, thus making it faster.
ccb-hms · Feb 29, 2024 · d8c7132 · d8c7132
1 parent 07f2298
commit d8c7132
Show file tree

Hide file tree

Showing 2 changed files with 3 additions and 6 deletions.
diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml
@@ -26,7 +26,6 @@ jobs:
     - name: show python path
       run: |
         python -c "import sys; print('\n'.join(sys.path))"
-        echo $PYTHONPATH
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip

diff --git a/text2term/t2t.py b/text2term/t2t.py
@@ -265,13 +265,11 @@ def _add_tags_to_df(df, tags):
 
 
 def _filter_mappings(mappings_df, min_score):
-    new_df = pd.DataFrame(columns=mappings_df.columns)
-    for index, row in mappings_df.iterrows():
-        if row['Mapping Score'] >= min_score:
-            new_df.loc[len(new_df.index)] = row
+    if mappings_df.empty:
+        return mappings_df
+    new_df = mappings_df.loc[mappings_df["Mapping Score"] >= min_score]
     return new_df
 
-
 def _add_unmapped_terms(mappings_df, tags, source_terms, source_terms_ids):
     if mappings_df.size == 0:
         mapped = []