-
Notifications
You must be signed in to change notification settings - Fork 3
/
config.yaml
66 lines (48 loc) · 2.2 KB
/
config.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
# Used for parallelization by snakemake. This setting is based on the architecture of the metal-as-as-service
# node netdc-bms-c11g.maas - which has 56 cores. Adjust this setting as needed. For example, on most laptops
# a reasonable value might be 4, or 8.
cpu_cores: 56
# Used for scatter/gather processing, corresponds with the number of families within the taxon defined in
# fasta_filter that have records for the specified marker. In practice, this means that the input BCDM TSV
# file has to have exactly this many distinct (not empty) values for the `family` column where the column
# named under fasta_filter.filter_level has has value fasta_filter.filter_name (e.g. the `order` must be
# `Odonata`. TODO: make this so that it is done automatically from the data. At present, this needs to be
# calculated by the user from the input file, e.g. by first making the sqlite database, or by grepping the
# TSV file somehow.
nfamilies: 36
# Number of outgroups to include in each family-level analysis. Minimum is 2.
outgroups: 3
# Used for verbosity, see `logging` from stdlib
log_level: 'INFO'
# Either use COI-5P or matK_rbcL
marker: COI-5P
# Substitution model in RAxML CLI syntax
model: GTR+G
# Minimum length to include (this fits COI-5P)
minlength: 600
# Minimum number of sequences per alignment
minseq: 3
# Which exemplars to pick: tallest, shortest, or median
exemplars: tallest
# How to rescale the subtrees: exemplars (i.e. subtree depth ratio) or outgroup (crown-to-crown distance ratio)
scaling: exemplars
# Maximum number of sequences per alignment
maxseq: 12000
# Nucleotide alignments (NT) or aminoacid (AA) for MACSE alignment
datatype: NT
# Choose which records to use from the database for the pipeline. filter_name only takes one name, so does filter level.
# filter levels: class, order, family, genus, all (no filter)
fasta_filter:
filter_level: order
filter_name: Odonata
name: phylogeny
blastdb: blastdb
dependencies:
- pip:
- -r requirements.txt
file_names:
bold_tsv: resources/BOLD_Public.05-Apr-2024-curated.tsv
open_tre: resources/opentree14.9_tree/labelled_supertree/labelled_supertree.tre
hmm: resources/hmm/COI-5P.hmm
fasta_dir: results/fasta/family
blast_dir: results/blast