-
Notifications
You must be signed in to change notification settings - Fork 4
/
config.yaml
63 lines (53 loc) · 1.96 KB
/
config.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
#-- I/O --#
# file listing samples and associated data
samples_file: tests/samples/samples_n10.txt
## column names in samples table
samples_col: 'ncbi_organism_name'
fasta_file_path_col: 'fasta_file_path'
taxID_col: 'ncbi_species_taxid'
taxonomy_col: 'ncbi_taxonomy'
# output location
output_dir: tests/output_n10/
# temporary file directory (your username will be added automatically)
tmp_dir: /ebio/abt3_scratch/
#-- databases to create --#
# Replace "Create" with "Skip" to skip creation of any of these
# Note that braken relies on the kraken2 database
databases:
kraken2: Create
bracken: Create
humann2_bowtie2: Create
humann2_diamond: Create
# output database name
db_name: GTDB-custom
#-- keep intermediate files required for re-creating DBs (eg., w/ more genomes) --#
# If "True", the intermediate files are saved to `output_dir`
# Else, the intermediate files are temporarily stored in `temp_folder`
keep_intermediate: True
use_ancient: True
#-- if custom NCBI taxdump files (or just Skip) --#
names_dmp: Skip
nodes_dmp: Skip
#-- software parameters --#
# `vsearch_per_genome` = per-genome gene clustering
# `vsearch_all` = all genes clustered (including `humann2_nuc_seqs` & `humann2_prot_seqs`)
params:
bracken_build_kmer: 35
bracken_build_read_lens:
- 100
- 150
prodigal: ""
diamond_db: /ebio/abt3_projects2/databases_no-backup/humann2/uniref50/uniref50_annotated.1.1.dmnd
diamond_db_to_mem: True
diamond: --evalue 1e-3 --sensitive --max-target-seqs 20 --block-size 3 --index-chunks 2
vsearch_per_genome: --id 0.97 --strand both --qmask none --fasta_width 0
vsearch_all: Skip #--id 1.0 --strand both --qmask none --fasta_width 0
#-- If adding genes to humann2 database --#
# If you have nucleotid and/or protein gene sequences formatted for humann2,
# provide the file paths to the fasta files below (gzip'ed)
humann2_nuc_seqs: Skip
humann2_prot_seqs: Skip
#-- snakemake pipeline --#
pipeline:
snakemake_folder: ./
script_folder: ./bin/scripts/