-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathBox_plots_pairwise and group comparison.R
381 lines (283 loc) · 14.4 KB
/
Box_plots_pairwise and group comparison.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
##Plotting box plots for phyla at four different time points by RCT
##Using the rarified RM phyloseq file for plotting phlya trend
psnoncontam_tru_final_rare_rm
##Agglomerate at phyla level and extract the OTU table
phylum_data_total_rm <- tax_glom(psnoncontam_tru_final_rare_rm,taxrank = "Phylum")
phylum_data_total_rm
#Relative abundances
#phylum_data_total_rm.rel <- transform(phylum_data_total_rm, "compositional")
#phylum_data_total_rm.rel
#using CLR transformed abundances to plot
#Using CLR transformed abundances here
#Write the phyloseq object as a CSV table with ASVs and counts
phylum_data_total_rm
write_phyloseq(phylum_data_total_rm)
#Read the CSV file
abund_table <- read.csv("phylum_rm_rare_clr_new.csv", row.names=1, check.names= FALSE) #Added header "Features" to the first clumn of ASVs
abund_table_t<-t(abund_table)
#View(abund_table_t)
#install.packages("zCompositions")
#library (zCompositions)
abund_table_r <- t(cmultRepl((abund_table_t), method="CZM", output="prop")) #prop calculates the imputed proportion is similar to "counts" and then taking proportions in the previous version; CZM: count zero multiplicative
#Check the before and after files
#View(abund_table)
#View(abund_table_r)
#Apply CLR transformation
abund_clr <- t(apply(abund_table_r, 2, function(x){log(x) - mean (log(x))}))
#View(abund_clr)
#Replacing original counts with CLR transformed values
phylum_data_rm_zclr <- phylum_data_total_rm
otu_table(phylum_data_rm_zclr) <- otu_table(abund_clr, taxa_are_rows = F)
phylum_data_rm_zclr
#replacing ASVs with short string
taxa_names(phylum_data_rm_zclr) <- paste0("SV", seq(ntaxa(phylum_data_rm_zclr)))
#View(tax_table(phylum_data_rm_zclr))
#Extracting short strings and their respective taxonomy
phylum_list<- tax_table(phylum_data_rm_zclr)@.Data
write.csv(phylum_list, "phylum_total_list.csv")
#extracting as csv and replacing string names with phyla names, transposed and added metadata
write_phyloseq(phylum_data_rm_zclr, type= 'all')
#transposed, added additional metadata and importing the file
phylum_zibr_rm <- read.table("otu_table_phyla_rm_clr_new.csv", header = TRUE, sep= ",", check.names = F)
phylum_zibr_rm
#Melting the data frame to use it for boxplot and statistics
trans_phylum_zibr_rm <- melt(phylum_zibr_rm)
trans_phylum_zibr_rm
#Changing the column name from 'variable' to 'Phylum'
colnames(trans_phylum_zibr_rm)[4] <- "Phylum"
trans_phylum_zibr_rm
#Rstatix
stat.test.phylum <- trans_phylum_zibr_rm %>%
group_by(Phylum, TimePoint) %>%
wilcox_test(value ~ Group) %>%
adjust_pvalue(method = "fdr") %>%
add_significance()
stat.test.phylum
#Adding XY position
stat.test.phylum <- stat.test.phylum %>% add_xy_position(x = "TimePoint")
stat.test.phylum
#Box plot
bxp.phylum.zibr <- ggboxplot(data= trans_phylum_zibr_rm, x = "TimePoint", y= "value", facet.by = "Phylum", xlab = "Time Points", ylab="Relative abundance", fill = "Group", width = 0.8, outlier.shape= NA)
#modifying the plot to use individual y-scale for each facet
bxp.phylum.zibr <- facet(p = bxp.phylum.zibr,facet.by = "Phylum",scales = "free_y") + scale_fill_manual(values = c("#80D7DD", "#F3DB80")) +
geom_point(aes(color = Group), position = position_dodge(0.7)) + stat_summary(
fun = median,
geom = 'line',
aes(group = Group, colour = Group),
position = position_dodge(width = 0.7) #this has to be added
) +
scale_color_manual(values = c("#354360", "#7D6608")) + xlab("Time points") +
ylab("Abundance (CLR)")
bxp.phylum.zibr + theme_bw() + theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank()) + stat_pvalue_manual(stat.test.phylum,label= "p.adj.signif", tip.length=0.02, hide.ns = T)
####Box plots for genera at four time points by RCT groups
psnoncontam_tru_final_rare_rm
##Agglomerate at phyla level and extract the OTU table
genus_data_total_rm <- tax_glom(psnoncontam_tru_final_rare_rm,taxrank = "Genus")
genus_data_total_rm
#Relative abundances
#genus_data_total_rm.rel <- transform(genus_data_total_rm, "compositional")
#genus_data_total_rm.rel
#Using CLR abundances for plot
#Using CLR transformed abundances here
#Write the phyloseq object as a CSV table with ASVs and counts
genus_data_total_rm
write_phyloseq(genus_data_total_rm)
#Read the CSV file
abund_table <- read.csv("genus_rm_rare_clr_new.csv", row.names=1, check.names= FALSE) #Added header "Features" to the first clumn of ASVs
abund_table_t<-t(abund_table)
#View(abund_table_t)
#install.packages("zCompositions")
#library (zCompositions)
abund_table_r <- t(cmultRepl((abund_table_t), method="CZM", output="prop")) #prop calculates the imputed proportion is similar to "counts" and then taking proportions in the previous version; CZM: count zero multiplicative
#Check the before and after files
#View(abund_table)
#View(abund_table_r)
#Apply CLR transformation
abund_clr <- t(apply(abund_table_r, 2, function(x){log(x) - mean (log(x))}))
#View(abund_clr)
#Replacing original counts with CLR transformed values
genus_data_rm_zclr <- genus_data_total_rm
otu_table(genus_data_rm_zclr) <- otu_table(abund_clr, taxa_are_rows = F)
genus_data_rm_zclr
#replacing ASVs with short string
taxa_names(genus_data_rm_zclr) <- paste0("SV", seq(ntaxa(genus_data_rm_zclr)))
#View(tax_table(genus_data_rm_zclr))
#Extracting short strings and their respective taxonomy
genus_list<- tax_table(genus_data_rm_zclr)@.Data
write.csv(genus_list, "genus_total_list.csv")
#extracting as csv and replacing string names with phyla names, transposed and added metadata
write_phyloseq(genus_data_rm_zclr, type= 'all')
#transposed, added additional metadata and importing the file
genus_zibr_rm <- read.table("otu_table_genus_rm_clr_new.csv", header = TRUE, sep= ",", check.names = F)
genus_zibr_rm
#Melting the data frame to use it for boxplot and statistics
trans_genus_zibr_rm <- melt(genus_zibr_rm)
trans_genus_zibr_rm
#Changing the column name from 'variable' to 'Genus'
colnames(trans_genus_zibr_rm)[4] <- "Genus"
trans_genus_zibr_rm
#Rstatix
stat.test.genus <- trans_genus_zibr_rm %>%
group_by(Genus, TimePoint) %>%
wilcox_test(value ~ Group) %>%
adjust_pvalue(method = "fdr") %>%
add_significance()
stat.test.genus
#Exporting the post-hoc analysis as csv file
#write.csv(stat.test.genus, "genus_summary_posthoc_statistics_rct.csv")
#Adding XY position
stat.test.genus <- stat.test.genus %>% add_xy_position(x = "TimePoint")
stat.test.genus
#Box plot
bxp.genus.zibr <- ggboxplot(data= trans_genus_zibr_rm, x = "TimePoint", y= "value", facet.by = "Genus", xlab = "Time Points", ylab="Abundance (CLR)", fill = "Group", width = 0.8, outlier.shape= NA)
#modifying the plot to use individual y-scale for each facet
bxp.genus.zibr <- facet(p = bxp.genus.zibr,facet.by = "Genus",scales = "free_y") + scale_fill_manual(values = c("#80D7DD", "#F3DB80")) +
geom_point(aes(color = Group), position = position_dodge(0.7)) + stat_summary(
fun = median,
geom = 'line',
aes(group = Group, colour = Group),
position = position_dodge(width = 0.7) #this has to be added
) +
scale_color_manual(values = c("#354360", "#7D6608")) + xlab("Time points") +
ylab("Abundance (CLR)")
bxp.genus.zibr + theme_bw() + theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank()) + stat_pvalue_manual(stat.test.genus,label= "p.adj.signif", tip.length=0.02, hide.ns = T)
####Box plots for taxa at four time points by MOM groups
##At phylum level
#psnoncontam_tru_final_rare_rm
##Agglomerate at phyla level and extract the OTU table
#phylum_data_total_rm <- tax_glom(psnoncontam_tru_final_rare_rm,taxrank = "Phylum")
#phylum_data_total_rm
#Relative abundances
#phylum_data_total_rm.rel <- transform(phylum_data_total_rm, "compositional")
#phylum_data_total_rm.rel
#using CLR transformed abundances to plot
#Using CLR transformed abundances here
#Write the phyloseq object as a CSV table with ASVs and counts
#phylum_data_total_rm
#write_phyloseq(phylum_data_total_rm)
#Read the CSV file
#abund_table <- read.csv("phylum_rm_rare_clr.csv", row.names=1, check.names= FALSE) #Added header "Features" to the first clumn of ASVs
#abund_table_t<-t(abund_table)
#View(abund_table_t)
#install.packages("zCompositions")
#library (zCompositions)
#abund_table_r <- t(cmultRepl((abund_table_t), method="CZM", output="prop")) #prop calculates the imputed proportion is similar to "counts" and then taking proportions in the previous version; CZM: count zero multiplicative
#Check the before and after files
#View(abund_table)
#View(abund_table_r)
#Apply CLR transformation
#abund_clr <- t(apply(abund_table_r, 2, function(x){log(x) - mean (log(x))}))
#View(abund_clr)
#Replacing original counts with CLR transformed values
#phylum_data_rm_zclr <- phylum_data_total_rm
#otu_table(phylum_data_rm_zclr) <- otu_table(abund_clr, taxa_are_rows = F)
#phylum_data_rm_zclr
#replacing ASVs with short string
#taxa_names(phylum_data_rm_zclr) <- paste0("SV", seq(ntaxa(phylum_data_rm_zclr)))
#View(tax_table(phylum_data_rm_zclr))
#Extracting short strings and their respective taxonomy
#phylum_list<- tax_table(phylum_data_rm_zclr)@.Data
#write.csv(phylum_list, "phylum_total_list.csv")
#extracting as csv and replacing string names with phyla names, transposed and added metadata
write_phyloseq(phylum_data_rm_zclr, type= 'all')
#transposed, added additional metadata and importing the file
phylum_zibr_rm_MOM <- read.table("otu_table_phyla_rm_MOM_clr_new.csv", header = TRUE, sep= ",", check.names = F)
phylum_zibr_rm_MOM
#Melting the data frame to use it for boxplot and statistics
trans_phylum_zibr_rm_MOM <- melt(phylum_zibr_rm_MOM)
trans_phylum_zibr_rm_MOM
#Changing the column name from 'variable' to 'Phylum'
colnames(trans_phylum_zibr_rm_MOM)[4] <- "Phylum"
trans_phylum_zibr_rm_MOM
#Rstatix
stat.test.phylum.MOM <- trans_phylum_zibr_rm_MOM %>%
group_by(Phylum, TimePoint) %>%
wilcox_test(value ~ MOM_Group) %>%
adjust_pvalue(method = "fdr") %>%
add_significance()
stat.test.phylum.MOM
#Adding XY position
stat.test.phylum.MOM <- stat.test.phylum.MOM %>% add_xy_position(x = "TimePoint")
stat.test.phylum.MOM
#Box plot
bxp.phylum.zibr.MOM <- ggboxplot(data= trans_phylum_zibr_rm_MOM, x = "TimePoint", y= "value", facet.by = "Phylum", xlab = "Time Points", ylab="Relative abundance", fill = "MOM_Group", width = 0.8, outlier.shape= NA)
#modifying the plot to use individual y-scale for each facet
bxp.phylum.zibr.MOM <- facet(p = bxp.phylum.zibr.MOM,facet.by = "Phylum",scales = "free_y") + scale_fill_manual(values = c("#E56878", "#D4DFE6")) +
geom_point(aes(color = MOM_Group), position = position_dodge(0.7)) + stat_summary(
fun = median,
geom = 'line',
aes(group = MOM_Group, colour = MOM_Group),
position = position_dodge(width = 0.7) #this has to be added
) +
scale_color_manual(values=c("#DC354B", "#A9BFCC")) + xlab("Time points") +
ylab("Abundance (CLR)")
bxp.phylum.zibr.MOM + theme_bw() + theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank()) + stat_pvalue_manual(stat.test.phylum.MOM,label= "p.adj.signif", tip.length=0.02, hide.ns = T)
###For genus level
psnoncontam_tru_final_rare_rm
##Agglomerate at phyla level and extract the OTU table
genus_data_total_rm <- tax_glom(psnoncontam_tru_final_rare_rm,taxrank = "Genus")
genus_data_total_rm
write_phyloseq(genus_data_total_rm)
#Read the CSV file
abund_table <- read.csv("otu_table_genus_rm_clr_april_2021.csv", row.names=1, check.names= FALSE) #Added header "Features" to the first clumn of ASVs
abund_table_t<-t(abund_table)
#View(abund_table_t)
#install.packages("zCompositions")
#library (zCompositions)
abund_table_r <- t(cmultRepl((abund_table_t), method="CZM", output="prop")) #prop calculates the imputed proportion is similar to "counts" and then taking proportions in the previous version; CZM: count zero multiplicative
#Check the before and after files
#View(abund_table)
#View(abund_table_r)
#Apply CLR transformation
abund_clr <- t(apply(abund_table_r, 2, function(x){log(x) - mean (log(x))}))
#View(abund_clr)
#Replacing original counts with CLR transformed values
genus_data_rm_zclr <- genus_data_total_rm
otu_table(genus_data_rm_zclr) <- otu_table(abund_clr, taxa_are_rows = F)
genus_data_rm_zclr
#replacing ASVs with short string
#taxa_names(genus_data_rm_zclr) <- paste0("SV", seq(ntaxa(genus_data_rm_zclr)))
#View(tax_table(genus_data_rm_zclr))
#Extracting short strings and their respective taxonomy
#genus_list<- tax_table(genus_data_rm_zclr)@.Data
#write.csv(genus_list, "genus_total_list.csv")
#extracting as csv and replacing string names with phyla names, transposed and added metadata
write_phyloseq(genus_data_rm_zclr, type= 'all')
#transposed, added additional metadata and importing the file
genus_zibr_rm.MOM <- read.table("otu_table_genus_rm_MOM_clr_new.csv", header = TRUE, sep= ",", check.names = F)
genus_zibr_rm.MOM
#Melting the data frame to use it for boxplot and statistics
trans_genus_zibr_rm.MOM <- melt(genus_zibr_rm.MOM)
trans_genus_zibr_rm.MOM
#Changing the column name from 'variable' to 'Genus'
colnames(trans_genus_zibr_rm.MOM)[4] <- "Genus"
trans_genus_zibr_rm.MOM
#Rstatix
stat.test.genus.MOM <- trans_genus_zibr_rm.MOM %>%
group_by(Genus, TimePoint) %>%
wilcox_test(value ~ MOM_Group) %>%
adjust_pvalue(method = "fdr") %>%
add_significance()
stat.test.genus.MOM
#Exporting this post-hoc analysis data as a csv file
#write.csv(stat.test.genus.MOM, "genus_summary_posthoc_statistics_MOM.csv")
#Adding XY position
stat.test.genus.MOM <- stat.test.genus.MOM %>% add_xy_position(x = "TimePoint")
stat.test.genus.MOM
#Box plot
bxp.genus.zibr <- ggboxplot(data= trans_genus_zibr_rm.MOM, x = "TimePoint", y= "value", facet.by = "Genus", xlab = "Time Points", ylab="Abundance (CLR)", fill = "MOM_Group", width = 0.8, outlier.shape= NA)
#modifying the plot to use individual y-scale for each facet
bxp.genus.zibr <- facet(p = bxp.genus.zibr,facet.by = "Genus",scales = "free_y") + scale_fill_manual(values = c("#E56878", "#D4DFE6")) +
geom_point(aes(color = MOM_Group), position = position_dodge(0.7)) + stat_summary(
fun = median,
geom = 'line',
aes(group = MOM_Group, colour = MOM_Group),
position = position_dodge(width = 0.7) #this has to be added
) +
scale_color_manual(values=c("#DC354B", "#A9BFCC")) + xlab("Time points") +
ylab("Abundance (CLR)")
bxp.genus.zibr + theme_bw() + theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank()) + stat_pvalue_manual(stat.test.genus.MOM,label= "p.adj.signif", tip.length=0.02, hide.ns = T)