-
Notifications
You must be signed in to change notification settings - Fork 0
/
Snakefile_phylogeography
116 lines (102 loc) · 4.25 KB
/
Snakefile_phylogeography
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
import os
# To run locally:
# snakemake --snakefile Snakefile_phylogeography --keep-going --cores 4 --use-singularity --singularity-prefix ~/.singularity --singularity-args "--home ~"
# To visualise the pipeline
# snakemake --snakefile Snakefile_phylogeography --dag | dot -Tsvg > pipeline_phylogeography.svg
localrules: all
folder = os.path.abspath(config.get("folder", '..'))
data_dir = os.path.join(folder, 'data')
dating = ['treetime', 'lsd2']
os.makedirs('logs', exist_ok=True)
rule all:
input:
expand(os.path.join(data_dir, 'acr', 'compressed.country_Vietnam.best_tree.pos.{dating}.html'), dating=dating),
expand(os.path.join(data_dir, 'acr', 'full.country.best_tree.pos.{dating}.html'), dating=dating),
expand(os.path.join(data_dir, 'acr', 'compressed.country_Vietnam.best_tree.pos.lsd2.subsampled_{i}.html'), i=range(4)),
expand(os.path.join(data_dir, 'acr', 'full.country.best_tree.pos.lsd2.subsampled_{i}.html'), i=range(4)),
rule pastml_col:
'''
ACR with PastML.
'''
input:
tree = os.path.join(data_dir, 'timetrees', '{tree}.nexus'),
data = os.path.join(data_dir, 'metadata.combined.tab'),
output:
data = os.path.join(data_dir, 'acr', 'pastml', '{col,country|region|intregion}', '{tree}', 'combined_ancestral_states.tab'),
pars = os.path.join(data_dir, 'acr', 'pastml', '{col}', '{tree}', 'params.character_{col}.method_MPPA.model_F81.tab'),
tree = os.path.join(data_dir, 'acr', 'pastml', '{col}', '{tree}', 'named.tree_{tree}.nwk'),
threads: 2
singularity: "docker://evolbioinfo/pastml:v1.9.30"
params:
mem = 4000,
name = 'acr_{col}.{tree}',
wd = os.path.join(data_dir, 'acr', 'pastml', '{col}', '{tree}')
shell:
"""
echo "pastml --tree {input.tree} --data {input.data} --columns "{wildcards.col}" -v --work_dir "{params.wd}" --smoothing --resolve_polytomies "
pastml --tree {input.tree} --data {input.data} --columns "{wildcards.col}" -v --work_dir "{params.wd}" \
--smoothing --resolve_polytomies
"""
rule pastml_vis_tree:
'''
Visualisation with PastML.
'''
input:
tree = os.path.join(data_dir, 'acr', 'pastml', '{col}', '{tree}', 'named.tree_{tree}.nwk'),
output:
map = os.path.join(data_dir, 'acr', 'full.{col,country|region|intregion}.{tree}.html'),
threads: 2
singularity: "docker://evolbioinfo/pastml:v1.9.30"
params:
mem = 4000,
name = 'vist_{col}.{tree}',
wd = os.path.join(data_dir, 'acr', 'pastml', 'vist_{col}', '{tree}'),
qos = 'fast'
shell:
"""
pastml --tree {input.tree} -v --work_dir "{params.wd}" --html "{output.map}" \
--columns "{wildcards.col}" --prediction_method COPY
rm -rf "{params.wd}"
"""
rule pastml_vis_focused:
'''
Visualisation with PastML focused on a particular character state.
'''
input:
tree = os.path.join(data_dir, 'acr', 'pastml', 'country', '{tree}', 'named.tree_{tree}.nwk'),
output:
map = os.path.join(data_dir, 'acr', 'compressed.country_{country,[A-Z][a-zA-Z]+}.{tree}.html'),
threads: 2
singularity: "docker://evolbioinfo/pastml:v1.9.29.9"
params:
mem = 4000,
name = 'vis_country_{country}.{tree}',
wd = os.path.join(data_dir, 'acr', 'pastml', 'vis_{country}', '{tree}'),
qos = 'fast'
shell:
"""
pastml --tree {input.tree} -v --work_dir "{params.wd}" --html_compressed "{output.map}" \
--columns country --tip_size_threshold 15 --prediction_method COPY \
--focus "{wildcards.country}"
rm -rf "{params.wd}"
"""
rule subsample:
'''
Subsamples a tree.
'''
input:
tree = os.path.join(data_dir, 'timetrees', 'best_tree.pos.{dating}.nexus'),
tab = os.path.join(data_dir, 'metadata.combined.tab'),
output:
tree = os.path.join(data_dir, 'timetrees', 'best_tree.pos.{dating}.subsampled_{i}.nexus'),
threads: 2
singularity: "docker://evolbioinfo/pastml:v1.9.30"
params:
mem = 4000,
name = 'subsample',
qos = 'fast'
shell:
"""
python3 py/subsample.py --in_tree {input.tree} --out_tree {output.tree} \
--tab {input.tab} --threshold 15
"""