Skip to content

Commit

Permalink
Merge pull request #62 from ccb-hms/development
Browse files Browse the repository at this point in the history
Add BioPortal API key parameter + Minor fixes (closes #61)
  • Loading branch information
rsgoncalves authored Jul 21, 2024
2 parents 35b0046 + d5122a5 commit 058c6bc
Show file tree
Hide file tree
Showing 5 changed files with 35 additions and 10 deletions.
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
long_description=long_description,
long_description_content_type='text/markdown',
author='Center for Computational Biomedicine, Harvard Medical School',
author_email='rafael_goncalves@hms.harvard.edu',
author_email='rsgoncalves@gmx.com',
classifiers=[
'Development Status :: 4 - Beta',
'License :: OSI Approved :: MIT License',
Expand Down
17 changes: 16 additions & 1 deletion test/simple_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,11 +145,19 @@ def test_mapping_zooma_ontologies(self):
assert df_zooma[self.MAPPED_TERM_CURIE_COLUMN].str.contains("EFO:").any()
assert df_zooma[self.MAPPED_TERM_CURIE_COLUMN].str.contains("NCIT:").any()

def test_mapping_bioportal_ontologies_no_apikey(self):
# Test mapping a list of terms to multiple ontologies using the BioPortal Annotator mapper without API Key
print("Test mapping a list of terms to multiple ontologies using the BioPortal Annotator mapper...")
df_bioportal = text2term.map_terms(["asthma", "location", "food allergy"], target_ontology="EFO,NCIT",
mapper=Mapper.BIOPORTAL, term_type=OntologyTermType.ANY)
assert df_bioportal.empty is True

def test_mapping_bioportal_ontologies(self):
# Test mapping a list of terms to multiple ontologies using the BioPortal Annotator mapper
print("Test mapping a list of terms to multiple ontologies using the BioPortal Annotator mapper...")
df_bioportal = text2term.map_terms(["asthma", "location", "food allergy"], target_ontology="EFO,NCIT",
mapper=Mapper.BIOPORTAL, term_type=OntologyTermType.ANY)
mapper=Mapper.BIOPORTAL, term_type=OntologyTermType.ANY,
bioportal_apikey="8f0cbe43-2906-431a-9572-8600d3f4266e")
print(f"{df_bioportal}\n")
assert df_bioportal.size > 0
assert df_bioportal[self.MAPPED_TERM_CURIE_COLUMN].str.contains("EFO:").any()
Expand Down Expand Up @@ -207,6 +215,13 @@ def test_mapping_with_min_score_filter(self):
term_type=OntologyTermType.ANY, min_score=min_score)
assert (df_leven[self.MAPPING_SCORE_COLUMN] >= min_score).all()

def test_mapping_with_min_score_filter_empty_results(self):
self.ensure_cache_exists("EFO", self.EFO_URL)
print("Test mapping to EFO using TFIDF similarity metric and min_score filter that results in no mappings...")
df_tfidf = text2term.map_terms(["carbon monoxide"], target_ontology="EFO", use_cache=True, mapper=Mapper.TFIDF,
term_type=OntologyTermType.ANY, min_score=0.99)
assert df_tfidf.empty is True

def test_include_unmapped_terms(self):
self.ensure_cache_exists("EFO", self.EFO_URL)
df = text2term.map_terms(["asthma", "margarita"], target_ontology="EFO", use_cache=True, mapper=Mapper.TFIDF,
Expand Down
7 changes: 5 additions & 2 deletions text2term/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

if __name__ == "__main__":
parser = argparse.ArgumentParser(description='A tool for mapping free-text descriptions of (biomedical) '
'entities to controlled terms in an ontology')
'entities to ontology terms')
parser.add_argument("-s", "--source", required=True, type=str,
help="Input file containing 'source' terms to map to ontology terms: list of terms or CSV file")
parser.add_argument("-t", "--target", required=True, type=str,
Expand Down Expand Up @@ -42,6 +42,8 @@
help="Define whether to map to ontology classes, properties, or both")
parser.add_argument('-u', "--incl_unmapped", required=False, default=False, action="store_true",
help="Include all unmapped terms in the output")
parser.add_argument('-bp', "--bioportal_apikey", required=False, type=str, default="",
help="BioPortal API Key to use along with the BioPortal mapper option")

arguments = parser.parse_args()
if not os.path.exists(arguments.source):
Expand All @@ -63,4 +65,5 @@
excl_deprecated=arguments.excl_deprecated, mapper=mapper, max_mappings=arguments.top_mappings,
min_score=arguments.min_score, base_iris=iris, save_graphs=arguments.save_term_graphs,
save_mappings=True, separator=arguments.separator, use_cache=cache_exists(target),
term_type=arguments.term_type, incl_unmapped=arguments.incl_unmapped)
term_type=arguments.term_type, incl_unmapped=arguments.incl_unmapped,
bioportal_apikey=arguments.bioportal_apikey)
2 changes: 1 addition & 1 deletion text2term/config.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
VERSION = "4.2.0"
VERSION = "4.2.1"
17 changes: 12 additions & 5 deletions text2term/t2t.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
def map_terms(source_terms, target_ontology, base_iris=(), csv_columns=(), excl_deprecated=False, max_mappings=3,
min_score=0.3, mapper=Mapper.TFIDF, output_file='', save_graphs=False, save_mappings=False,
source_terms_ids=(), separator=',', use_cache=False, term_type=OntologyTermType.CLASS,
incl_unmapped=False):
incl_unmapped=False, bioportal_apikey=""):
"""
Maps the terms in the given list to the specified target ontology.
Expand Down Expand Up @@ -75,6 +75,8 @@ def map_terms(source_terms, target_ontology, base_iris=(), csv_columns=(), excl_
The type(s) of ontology terms to map to, which can be 'class' or 'property' or 'any'
incl_unmapped : bool
Include unmapped terms in the output data frame
bioportal_apikey : str
BioPortal API Key to use along with the BioPortal mapper option
Returns
----------
Expand All @@ -101,8 +103,9 @@ def map_terms(source_terms, target_ontology, base_iris=(), csv_columns=(), excl_
# Run the mapper
LOGGER.info(f"Mapping {len(source_terms)} source terms to {target_ontology}")
mappings_df = _do_mapping(source_terms, source_terms_ids, target_terms, mapper, max_mappings, min_score, tags,
incl_unmapped)
mappings_df["Mapping Score"] = mappings_df["Mapping Score"].astype(float).round(decimals=3)
incl_unmapped, bioportal_apikey)
if not mappings_df.empty:
mappings_df["Mapping Score"] = mappings_df["Mapping Score"].astype(float).round(decimals=3)
if save_mappings:
_save_mappings(mappings_df, output_file, min_score, mapper, target_ontology, base_iris,
excl_deprecated, max_mappings, term_type, source_terms, incl_unmapped)
Expand Down Expand Up @@ -194,7 +197,8 @@ def _load_ontology(ontology, iris, exclude_deprecated, use_cache=False, term_typ
return onto_terms


def _do_mapping(source_terms, source_term_ids, ontology_terms, mapper, max_mappings, min_score, tags, incl_unmapped):
def _do_mapping(source_terms, source_term_ids, ontology_terms, mapper, max_mappings, min_score, tags, incl_unmapped,
bioportal_apikey):
to_map, tags = _process_tags(source_terms, tags)
start = time.time()
if mapper == Mapper.TFIDF:
Expand All @@ -204,7 +208,10 @@ def _do_mapping(source_terms, source_term_ids, ontology_terms, mapper, max_mappi
term_mapper = ZoomaMapper()
mappings_df = term_mapper.map(to_map, source_term_ids, ontologies=ontology_terms, max_mappings=max_mappings)
elif mapper == Mapper.BIOPORTAL:
term_mapper = BioPortalAnnotatorMapper("8f0cbe43-2906-431a-9572-8600d3f4266e")
if bioportal_apikey == "":
LOGGER.error("A BioPortal API Key must be specified via the parameter `bioportal_apikey`")
return pd.DataFrame()
term_mapper = BioPortalAnnotatorMapper(bioportal_apikey)
mappings_df = term_mapper.map(to_map, source_term_ids, ontologies=ontology_terms, max_mappings=max_mappings)
elif mapper in {Mapper.LEVENSHTEIN, Mapper.JARO, Mapper.JARO_WINKLER, Mapper.INDEL, Mapper.FUZZY, Mapper.JACCARD}:
term_mapper = SyntacticMapper(ontology_terms)
Expand Down

0 comments on commit 058c6bc

Please sign in to comment.