Skip to content

Commit

Permalink
updated function docs
Browse files Browse the repository at this point in the history
  • Loading branch information
nick-youngblut committed Jul 18, 2023
1 parent 862a2bf commit 586361d
Show file tree
Hide file tree
Showing 3 changed files with 164 additions and 137 deletions.
27 changes: 20 additions & 7 deletions bin/gtdb_to_taxdump.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
import gtdb2td

# argparse
desc = 'Converting GTDB taxonomy to NCBI taxdump format'
desc = 'Convert GTDB taxonomy to NCBI taxdump format'
epi = """DESCRIPTION:
Convert Genome Taxonomy Database (GTDB) taxonomy files
to NCBI taxdump format (names.dmp & nodes.dmp).
Expand All @@ -28,7 +28,7 @@
The input table format should be >=2 columns,
(Column1 = accession, Column2 = gtdb_taxonomy),
and no header
and no header.
The *.dmp files are written to `--outdir`.
A tab-delim table of taxID info is written to STDOUT.
Expand Down Expand Up @@ -58,9 +58,14 @@
logging.basicConfig(format='%(asctime)s - %(message)s', level=logging.DEBUG)


def load_gtdb_tax(infile, graph):
def load_gtdb_tax(infile: str, graph) -> None:
"""
loading gtdb taxonomy & adding to DAG
Load gtdb taxonomy & adding to DAG
Params:
infile: input file url or path
graph: graph object
Returns:
None
"""
# url or file download/open
try:
Expand Down Expand Up @@ -102,14 +107,22 @@ def load_gtdb_tax(infile, graph):
except AttributeError:
pass

def write_blank_dmp(outfile, outdir=None):
def write_blank_dmp(outfile: str, outdir: str=None) -> None:
"""
Write a blank taxdump file.
Params:
outfile: filename
outdir: output directory (Default: current dir)
Returns:
None
"""
if outdir is not None:
outfile = os.path.join(outdir, outfile)
with open(outfile, 'w') as outF:
outF.write('#\n')
logging.info('File written: {}'.format(outfile))
logging.info(f'File written: {outfile}')

def main(args):
def main(args: dict) -> None:
# creating DAG
graph = gtdb2td.Graph()
graph.add_vertex('root')
Expand Down
34 changes: 24 additions & 10 deletions bin/lineage2taxid.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,15 @@ class CustomFormatter(argparse.ArgumentDefaultsHelpFormatter,
parser.add_argument('--version', action='version', version=__version__)

# functions
def lineage2taxid(lineage, G):
def lineage2taxid(lineage: str, G) -> list:
"""
Convert taxonomy lineage string to list of taxids
Params:
lineage: taxonomy lineage string
G: graph object containing NCBI taxonomy
Return:
List of taxids (or [NA,NA] if no taxid could be mapped)
"""
lineage = lineage.split(';')
for cls in lineage[::-1]:
cls = cls.lower()
Expand All @@ -79,12 +87,20 @@ def lineage2taxid(lineage, G):
logging.warning(msg.format(';'.join(lineage)))
return ['NA', 'NA']

def parse_lineage_table(table_file, lineage_column, G,
taxid_column, taxid_rank_column):
def parse_lineage_table(table_file: str, lineage_column: str, G,
taxid_column: str, taxid_rank_column: str) -> None:
"""
Parsing lineage and finding taxid
Parse lineage and finding taxid.
Params:
table_file: input table file
lineage_column: column containing lineages
G: graph containing NCBI taxonomy
taxid_column: column name to write taxids
taxid_rank_column: column name to write taxid ranks
Return:
None
"""
logging.info('Parsing file: {}'.format(table_file))
logging.info(f'Parsing file: {table_file}')
header = {}
with gtdb2td.Utils.Open(table_file) as inF:
for i,line in enumerate(inF):
Expand All @@ -95,8 +111,7 @@ def parse_lineage_table(table_file, lineage_column, G,
try:
_ = header[lineage_column]
except KeyError:
msg = 'Cannot find column: {}'
raise KeyError(msg.format(lineage_column))
raise KeyError(f'Cannot find column: {lineage_column}')
print('\t'.join(line + [taxid_column, taxid_rank_column]))
continue
# body
Expand All @@ -105,10 +120,9 @@ def parse_lineage_table(table_file, lineage_column, G,
print('\t'.join(line + [str(taxid), str(rank)]))
# status
if i > 0 and (i+1) % 100 == 0:
logging.info(' Records processed: {}'.format(i+1))
logging.info(f' Records processed: {i+1}')

## main interface
def main(args):
def main(args: dict) -> None:
"""
Main interface
"""
Expand Down
Loading

0 comments on commit 586361d

Please sign in to comment.