Skip to content

Commit

Permalink
Expanded error messages
Browse files Browse the repository at this point in the history
  • Loading branch information
nick-youngblut committed Jul 18, 2023
1 parent 1017ce4 commit ff91657
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 15 deletions.
10 changes: 1 addition & 9 deletions bin/gtdb_to_diamond.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,23 +3,15 @@
from __future__ import print_function
## batteries
import os
import re
import sys
import gzip
import glob
import shutil
import argparse
import logging
import urllib.request
import codecs
import tarfile
from collections import OrderedDict
## package
from bin import __version__
import gtdb2td

# argparse
desc = 'Converting GTDB taxonomy to input for "diamond makedb --taxonmap"'
desc = 'Convert GTDB taxonomy to input for "diamond makedb --taxonmap"'
epi = """DESCRIPTION:
Convert Genome Taxonomy Database (GTDB) representative genome
gene amino acid sequences to the input files required for
Expand Down
11 changes: 5 additions & 6 deletions bin/ncbi-gtdb_map.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,8 +239,7 @@ def load_gtdb_metadata(infile: str, G, completeness: float,
continue
line = line.split('\t')
if len(line) < 2:
msg = 'Line{} does not contain >=2 columns'
raise ValueError(msg.format(i+1))
raise ValueError(f'Line{i+1} does not contain >=2 columns')
# header
if i == 0:
header = {x:ii for ii,x in enumerate(line)}
Expand All @@ -249,22 +248,22 @@ def load_gtdb_metadata(infile: str, G, completeness: float,
try:
X = line[header['ncbi_taxonomy']]
except KeyError:
raise KeyError('Cannot find "ncbi_taxonomy"')
raise KeyError(f'Cannot find the "ncbi_taxonomy" column in {infile}')
if X == 'none':
stats['no ncbi tax'] += 1
continue
# filtering by checkM stats
try:
X = line[header['checkm_completeness']]
except KeyError:
raise KeyError('Cannot find "checkm_completeness"')
raise KeyError(f'Cannot find the "checkm_completeness" column in {infile}')
if float(X) < completeness:
stats['completeness'] += 1
continue
try:
X = line[header['checkm_contamination']]
except KeyError:
raise KeyError('Cannot find "checkm_contamination"')
raise KeyError(f'Cannot find the "checkm_contamination" column in {infile}')
if float(X) >= contamination:
stats['contamination'] += 1
continue
Expand Down Expand Up @@ -501,7 +500,7 @@ def query_tax(tax_queries: str, G, tax: str, lca_frac: float=1.0,
for i,q in enumerate(queries):
q_batch[i % procs].append(q)
queries = None
logging.info(f' No. of batches: {len(q_batch))}')
logging.info(f' No. of batches: {len(q_batch)}')
logging.info(f' Queries per batch: {len(q_batch[0])}')
# query graphs
logging.info('Querying taxonomies...')
Expand Down

0 comments on commit ff91657

Please sign in to comment.