Skip to content

Commit

Permalink
tarfile handling
Browse files Browse the repository at this point in the history
  • Loading branch information
rvosa committed Aug 16, 2024
1 parent 72e2d35 commit 9503066
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 3 deletions.
4 changes: 3 additions & 1 deletion barcode_validator/taxonomy.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import os
import time
import pandas as pd
import tarfile
from Bio import SeqIO
from Bio.SeqRecord import SeqRecord
from Bio.Blast import NCBIWWW, NCBIXML
Expand All @@ -20,7 +21,8 @@ def read_bold_taxonomy(spreadsheet):

def read_ncbi_taxonomy(tarfile):
logging.info("Reading NCBI taxonomy")
return NCBIParser(tarfile).parse()
tar = tarfile.open(tarfile, "r:gz")
return NCBIParser(tar).parse()


def run_seqid(sequence, ncbi_tree):
Expand Down
4 changes: 2 additions & 2 deletions config/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,8 @@ word_size: 28
BLASTDB_LMDB_MAP_SIZE: 180000000000 # MaaS 37, 180GB

# Location of the NCBI taxonomy dump. This must be the tar file that contains the nodes.dmp and names.dmp files.
# When downloaded, the tar file is gzip compressed. This needs to be uncompressed before use, e.g. with `gunzip`.
ncbi_taxonomy: /home/rutger.vos/data/ncbi/taxdump/taxdump.tar
# http://ftp.ncbi.nlm.nih.gov/pub/taxonomy/taxdump.tar.gz
ncbi_taxonomy: /home/rutger.vos/data/ncbi/taxdump/taxdump.tar.gz

# Configuration for logging. The verbosity level specified here is overridden by the value provided on the command
# line with the -v/-verbosity argument. The log file is written to the current working directory.
Expand Down

0 comments on commit 9503066

Please sign in to comment.