From d5122a583c4a9bd33e9840bcdd77945bff8ffa30 Mon Sep 17 00:00:00 2001
From: Rafael Goncalves <rsgoncalves@gmx.com>
Date: Sat, 20 Jul 2024 18:12:54 -0400
Subject: [PATCH] Add BioPortal API key parameter + Minor fixes (closes #61)

---
 setup.py              |  2 +-
 test/simple_tests.py  | 17 ++++++++++++++++-
 text2term/__main__.py |  7 +++++--
 text2term/config.py   |  2 +-
 text2term/t2t.py      | 17 ++++++++++++-----
 5 files changed, 35 insertions(+), 10 deletions(-)

diff --git a/setup.py b/setup.py
index d6d71c5..7455ca3 100644
--- a/setup.py
+++ b/setup.py
@@ -19,7 +19,7 @@
     long_description=long_description,
     long_description_content_type='text/markdown',
     author='Center for Computational Biomedicine, Harvard Medical School',
-    author_email='rafael_goncalves@hms.harvard.edu',
+    author_email='rsgoncalves@gmx.com',
     classifiers=[
         'Development Status :: 4 - Beta',
         'License :: OSI Approved :: MIT License',
diff --git a/test/simple_tests.py b/test/simple_tests.py
index 305281e..25dfb60 100644
--- a/test/simple_tests.py
+++ b/test/simple_tests.py
@@ -145,11 +145,19 @@ def test_mapping_zooma_ontologies(self):
         assert df_zooma[self.MAPPED_TERM_CURIE_COLUMN].str.contains("EFO:").any()
         assert df_zooma[self.MAPPED_TERM_CURIE_COLUMN].str.contains("NCIT:").any()
 
+    def test_mapping_bioportal_ontologies_no_apikey(self):
+        # Test mapping a list of terms to multiple ontologies using the BioPortal Annotator mapper without API Key
+        print("Test mapping a list of terms to multiple ontologies using the BioPortal Annotator mapper...")
+        df_bioportal = text2term.map_terms(["asthma", "location", "food allergy"], target_ontology="EFO,NCIT",
+                                           mapper=Mapper.BIOPORTAL, term_type=OntologyTermType.ANY)
+        assert df_bioportal.empty is True
+
     def test_mapping_bioportal_ontologies(self):
         # Test mapping a list of terms to multiple ontologies using the BioPortal Annotator mapper
         print("Test mapping a list of terms to multiple ontologies using the BioPortal Annotator mapper...")
         df_bioportal = text2term.map_terms(["asthma", "location", "food allergy"], target_ontology="EFO,NCIT",
-                                           mapper=Mapper.BIOPORTAL, term_type=OntologyTermType.ANY)
+                                           mapper=Mapper.BIOPORTAL, term_type=OntologyTermType.ANY,
+                                           bioportal_apikey="8f0cbe43-2906-431a-9572-8600d3f4266e")
         print(f"{df_bioportal}\n")
         assert df_bioportal.size > 0
         assert df_bioportal[self.MAPPED_TERM_CURIE_COLUMN].str.contains("EFO:").any()
@@ -207,6 +215,13 @@ def test_mapping_with_min_score_filter(self):
                                        term_type=OntologyTermType.ANY, min_score=min_score)
         assert (df_leven[self.MAPPING_SCORE_COLUMN] >= min_score).all()
 
+    def test_mapping_with_min_score_filter_empty_results(self):
+        self.ensure_cache_exists("EFO", self.EFO_URL)
+        print("Test mapping to EFO using TFIDF similarity metric and min_score filter that results in no mappings...")
+        df_tfidf = text2term.map_terms(["carbon monoxide"], target_ontology="EFO", use_cache=True, mapper=Mapper.TFIDF,
+                                       term_type=OntologyTermType.ANY, min_score=0.99)
+        assert df_tfidf.empty is True
+
     def test_include_unmapped_terms(self):
         self.ensure_cache_exists("EFO", self.EFO_URL)
         df = text2term.map_terms(["asthma", "margarita"], target_ontology="EFO", use_cache=True, mapper=Mapper.TFIDF,
diff --git a/text2term/__main__.py b/text2term/__main__.py
index ed94223..54dacc6 100644
--- a/text2term/__main__.py
+++ b/text2term/__main__.py
@@ -7,7 +7,7 @@
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser(description='A tool for mapping free-text descriptions of (biomedical) '
-                                                 'entities to controlled terms in an ontology')
+                                                 'entities to ontology terms')
     parser.add_argument("-s", "--source", required=True, type=str,
                         help="Input file containing 'source' terms to map to ontology terms: list of terms or CSV file")
     parser.add_argument("-t", "--target", required=True, type=str,
@@ -42,6 +42,8 @@
                         help="Define whether to map to ontology classes, properties, or both")
     parser.add_argument('-u', "--incl_unmapped", required=False, default=False, action="store_true",
                         help="Include all unmapped terms in the output")
+    parser.add_argument('-bp', "--bioportal_apikey", required=False, type=str, default="",
+                        help="BioPortal API Key to use along with the BioPortal mapper option")
 
     arguments = parser.parse_args()
     if not os.path.exists(arguments.source):
@@ -63,4 +65,5 @@
               excl_deprecated=arguments.excl_deprecated, mapper=mapper, max_mappings=arguments.top_mappings,
               min_score=arguments.min_score, base_iris=iris, save_graphs=arguments.save_term_graphs,
               save_mappings=True, separator=arguments.separator, use_cache=cache_exists(target),
-              term_type=arguments.term_type, incl_unmapped=arguments.incl_unmapped)
+              term_type=arguments.term_type, incl_unmapped=arguments.incl_unmapped,
+              bioportal_apikey=arguments.bioportal_apikey)
diff --git a/text2term/config.py b/text2term/config.py
index 773464b..f045f1a 100644
--- a/text2term/config.py
+++ b/text2term/config.py
@@ -1 +1 @@
-VERSION = "4.2.0"
+VERSION = "4.2.1"
diff --git a/text2term/t2t.py b/text2term/t2t.py
index a2e27a4..def86a8 100644
--- a/text2term/t2t.py
+++ b/text2term/t2t.py
@@ -31,7 +31,7 @@
 def map_terms(source_terms, target_ontology, base_iris=(), csv_columns=(), excl_deprecated=False, max_mappings=3,
               min_score=0.3, mapper=Mapper.TFIDF, output_file='', save_graphs=False, save_mappings=False,
               source_terms_ids=(), separator=',', use_cache=False, term_type=OntologyTermType.CLASS,
-              incl_unmapped=False):
+              incl_unmapped=False, bioportal_apikey=""):
     """
     Maps the terms in the given list to the specified target ontology.
 
@@ -75,6 +75,8 @@ def map_terms(source_terms, target_ontology, base_iris=(), csv_columns=(), excl_
         The type(s) of ontology terms to map to, which can be 'class' or 'property' or 'any'
     incl_unmapped : bool
         Include unmapped terms in the output data frame
+    bioportal_apikey : str
+        BioPortal API Key to use along with the BioPortal mapper option
 
     Returns
     ----------
@@ -101,8 +103,9 @@ def map_terms(source_terms, target_ontology, base_iris=(), csv_columns=(), excl_
     # Run the mapper
     LOGGER.info(f"Mapping {len(source_terms)} source terms to {target_ontology}")
     mappings_df = _do_mapping(source_terms, source_terms_ids, target_terms, mapper, max_mappings, min_score, tags,
-                              incl_unmapped)
-    mappings_df["Mapping Score"] = mappings_df["Mapping Score"].astype(float).round(decimals=3)
+                              incl_unmapped, bioportal_apikey)
+    if not mappings_df.empty:
+        mappings_df["Mapping Score"] = mappings_df["Mapping Score"].astype(float).round(decimals=3)
     if save_mappings:
         _save_mappings(mappings_df, output_file, min_score, mapper, target_ontology, base_iris,
                        excl_deprecated, max_mappings, term_type, source_terms, incl_unmapped)
@@ -194,7 +197,8 @@ def _load_ontology(ontology, iris, exclude_deprecated, use_cache=False, term_typ
     return onto_terms
 
 
-def _do_mapping(source_terms, source_term_ids, ontology_terms, mapper, max_mappings, min_score, tags, incl_unmapped):
+def _do_mapping(source_terms, source_term_ids, ontology_terms, mapper, max_mappings, min_score, tags, incl_unmapped,
+                bioportal_apikey):
     to_map, tags = _process_tags(source_terms, tags)
     start = time.time()
     if mapper == Mapper.TFIDF:
@@ -204,7 +208,10 @@ def _do_mapping(source_terms, source_term_ids, ontology_terms, mapper, max_mappi
         term_mapper = ZoomaMapper()
         mappings_df = term_mapper.map(to_map, source_term_ids, ontologies=ontology_terms, max_mappings=max_mappings)
     elif mapper == Mapper.BIOPORTAL:
-        term_mapper = BioPortalAnnotatorMapper("8f0cbe43-2906-431a-9572-8600d3f4266e")
+        if bioportal_apikey == "":
+            LOGGER.error("A BioPortal API Key must be specified via the parameter `bioportal_apikey`")
+            return pd.DataFrame()
+        term_mapper = BioPortalAnnotatorMapper(bioportal_apikey)
         mappings_df = term_mapper.map(to_map, source_term_ids, ontologies=ontology_terms, max_mappings=max_mappings)
     elif mapper in {Mapper.LEVENSHTEIN, Mapper.JARO, Mapper.JARO_WINKLER, Mapper.INDEL, Mapper.FUZZY, Mapper.JACCARD}:
         term_mapper = SyntacticMapper(ontology_terms)