forked from jvfe/dada2_latch
-
Notifications
You must be signed in to change notification settings - Fork 1
/
dada2.R
117 lines (84 loc) · 2.83 KB
/
dada2.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
library(dada2)
library(phyloseq)
args <- commandArgs(trailingOnly = TRUE)
read_dir1 <- args[1]
read_dir2 <- args[2]
output_dir <- args[3]
taxonomy_ref_fasta <- args[4]
minLen <- as.numeric(args[5])
maxN <- as.numeric(args[6])
minQ <- as.numeric(args[7])
maxEE <- as.numeric(args[8])
truncQ <- as.numeric(args[9])
trimLeft <- as.numeric(args[10])
trimRight <- as.numeric(args[11])
species_assignment_fasta <- args[12]
fnFs <-
sort(list.files(read_dir1, full.names = TRUE))
fnRs <-
sort(list.files(read_dir2, full.names = TRUE))
sample.names <- tools::file_path_sans_ext(basename(fnFs))
##### Filter and Trim
filtFs <-
file.path("/root", "filtered", paste0(sample.names, "_F_filt.fastq.gz"))
filtRs <-
file.path("/root", "filtered", paste0(sample.names, "_R_filt.fastq.gz"))
names(filtFs) <- sample.names
names(filtRs) <- sample.names
message("--- Filtering and Trimming Data ---")
out <- filterAndTrim(
fnFs,
filtFs,
fnRs,
filtRs,
minLen = minLen,
maxN = maxN,
minQ = minQ,
maxEE = maxEE,
truncQ = truncQ,
trimLeft = trimLeft,
trimRight = trimRight,
rm.phix = TRUE,
compress = TRUE,
multithread = TRUE
)
##### Dereplicate
message("--- Dereplicating FASTQ data ---")
derepF1 <- derepFastq(filtFs, verbose = TRUE)
derepR1 <- derepFastq(filtRs, verbose = TRUE)
##### Learn Errors
message("--- Learning Error Rates from the Dataset ---")
errF <- learnErrors(derepF1, multithread = TRUE)
errR <- learnErrors(derepR1, multithread = TRUE)
errF_plot <- plotErrors(errF, nominalQ=TRUE)
errR_plot <- plotErrors(errR, nominalQ=TRUE)
ggplot2::ggsave(paste0(output_dir, "/error_rates_forward.pdf"), errF_plot)
ggplot2::ggsave(paste0(output_dir, "/error_rates_reverse.pdf"), errR_plot)
##### Infer ASVs
message("--- Inferring sample composition ---")
dadaFs <- dada(derepF1, err = errF, multithread = TRUE)
dadaRs <- dada(derepR1, err = errR, multithread = TRUE)
##### Merge Pairs and Remove Chimeras
message("--- Merging read pairs and removing chimeras ---")
mergers <-
mergePairs(dadaFs, derepF1, dadaRs, derepR1, verbose = TRUE)
seqtab <- makeSequenceTable(mergers)
seqtab.nochim <-
removeBimeraDenovo(seqtab,
method = "consensus",
multithread = TRUE,
verbose = TRUE)
write.csv(as.data.frame(seqtab.nochim),
paste0(output_dir, "/asv_table.csv"),
col.names = NA)
###### Assign taxonomy
message("--- Assigning taxonomy to ASVs ---")
taxa <-
assignTaxonomy(seqtab.nochim, taxonomy_ref_fasta, multithread = TRUE)
if (is.na(species_assignment_fasta) == FALSE) {
taxa <- addSpecies(taxa, species_assignment_fasta)
}
write.csv(taxa, paste0(output_dir, "/species_table.csv"), col.names = NA)
ps <- phyloseq(otu_table(seqtab.nochim, taxa_are_rows=FALSE),
tax_table(as.matrix(taxa)))
save(ps, file = paste0(output_dir, "/phyloseq_object.RData"))