config.yaml

#-- I/O --#
# file listing samples and associated data
samples_file: tests/samples/samples_n10.txt

## column names in samples table
samples_col: 'ncbi_organism_name'
fasta_file_path_col: 'fasta_file_path'
taxID_col: 'ncbi_species_taxid'    
taxonomy_col: 'ncbi_taxonomy'   

# output location
output_dir: tests/output_n10/

# temporary file directory (your username will be added automatically)
tmp_dir: /ebio/abt3_scratch/

#-- databases to create --#
# Replace "Create" with "Skip" to skip creation of any of these
# Note that braken relies on the kraken2 database
databases:
  kraken2: Create
  bracken: Create
  humann2_bowtie2: Create
  humann2_diamond: Create

# output database name
db_name: GTDB-custom

#-- keep intermediate files required for re-creating DBs (eg., w/ more genomes) --#
# If "True", the intermediate files are saved to `output_dir`
# Else, the intermediate files are temporarily stored in `temp_folder`
keep_intermediate: True
use_ancient: True

#-- if custom NCBI taxdump files (or just Skip) --#
names_dmp: Skip 
nodes_dmp: Skip 

#-- software parameters --#
# `vsearch_per_genome` = per-genome gene clustering
# `vsearch_all` = all genes clustered (including `humann2_nuc_seqs` & `humann2_prot_seqs`)
params:
  bracken_build_kmer: 35
  bracken_build_read_lens:
    - 100
    - 150
  prodigal: ""
  diamond_db: /ebio/abt3_projects2/databases_no-backup/humann2/uniref50/uniref50_annotated.1.1.dmnd
  diamond_db_to_mem: True
  diamond: --evalue 1e-3 --sensitive --max-target-seqs 20 --block-size 3 --index-chunks 2
  vsearch_per_genome: --id 0.97 --strand both --qmask none --fasta_width 0
  vsearch_all: Skip #--id 1.0 --strand both --qmask none --fasta_width 0

#-- If adding genes to humann2 database --#
# If you have nucleotid and/or protein gene sequences formatted for humann2,
# provide the file paths to the fasta files below (gzip'ed)
humann2_nuc_seqs: Skip
humann2_prot_seqs: Skip

#-- snakemake pipeline --#
pipeline:
  snakemake_folder: ./
  script_folder: ./bin/scripts/