Skip to content

Commit

Permalink
ENH map_to_aro now returns None if no match
Browse files Browse the repository at this point in the history
Handle mapping not in ARO in map_to_aro function & simplify map_to_aro() code

The previous `map_to_aro` code also dealt with multiple ARO mappings which is not an issue anymore since gene clusters, reverse complements and CDSs being dealt as contigs have been solved for all databases (except megares which is currently being looked into)

Also, removes unnecessary warning in map_to_aro and document return of empty list if ARO invalid in map_to_aro
  • Loading branch information
Vedanth-Ramji authored May 9, 2024
1 parent e9d5a31 commit 1b2e192
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 8 deletions.
12 changes: 6 additions & 6 deletions argnorm/lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@ def map_to_aro(gene, database):
Returns:
ARO[result] (pronto.term.Term): A pronto term with the ARO number of input gene. ARO number can be accessed using 'id' attribute and gene name can be accessed using 'name' attribute.
If ARO mapping is doesn't exist, None is returned.
"""

if database not in ['ncbi', 'deeparg', 'resfinder', 'sarg', 'megares', 'argannot']:
Expand All @@ -62,10 +64,8 @@ def map_to_aro(gene, database):
except KeyError:
raise Exception(f'{gene} is not in {database} database')
else:
# Dealing with duplicated genes in ARO mapping table.
# Getting only one ARO number
if pd.isna(result):
return None

ARO = pronto.Ontology.from_obo_library('aro.obo')
if type(result) != str:
return ARO[list(set(result))[0]]
else:
return ARO[result]
return ARO.get(result)
6 changes: 4 additions & 2 deletions tests/test_lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,17 @@ def test_map_to_aro():
["(AGly)AAC(6')-Isa:NG_047311:101-574:474", 'argannot'],
["MEG_21|Drugs|Aminoglycosides|Aminoglycoside_N-acetyltransferases|AAC3", 'megares'],
["1028085756|WP_063844287.1|1|1|cpt|cpt|phosphotransferase|2|CHLORAMPHENICOL|PHENICOL|chloramphenicol_phosphotransferase_CPT", 'ncbi'],
["gb|AAG57600.1|ARO:3000318|mphB", "sarg"]
["gb|AAG57600.1|ARO:3000318|mphB", "sarg"],
["(Phe)cpt_strepv:U09991:AAB36569:1412-1948:537", "argannot"]
]

ARO = pronto.Ontology.from_obo_library('aro.obo')
expected_output = [
ARO.get_term('ARO:3002563'),
ARO.get_term('ARO:3004623'),
ARO.get_term('ARO:3000249'),
ARO.get_term('ARO:3000318')
ARO.get_term('ARO:3000318'),
None
]

for t, e in zip(test_cases, expected_output):
Expand Down

0 comments on commit 1b2e192

Please sign in to comment.