forked from danieltorsil/Pachnoda-marginata-gut-microbiome
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathR-commands_functional-analysis-assemblies-MAGs.Rmd
66 lines (52 loc) · 3.19 KB
/
R-commands_functional-analysis-assemblies-MAGs.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
---
title: "Commands for search genes involved in functions of interest in assemblies and MAGs"
output: html_notebook
---
# MAGs - presence/absence EC numbers
```{r}
# load package
library(pheatmap)
library(dplyr)
data <- read.csv2('functional-search/kofamscan_summary_MAGs_filtered.csv', sep = ',')
MAGs = data$MAG
Phylum = data$phylum
df <- data.frame(Phylum, MAGs)
df_unique <- unique(df)
rownames(df_unique) <- df_unique$MAGs
df_unique$samples <- NULL
matriz_conteo <- as.matrix(table(data$MAGs, data$EC))
matriz_conteo_modificada <- matriz_conteo[rowSums(matriz_conteo) != 0, ]
ann_colors = list(
Phylum = c(Bacteroidota = "#a34040", Desulfobacterota = "#de1818", Desulfobacterota_I = "#ebb0b0", Firmicutes = "#8c5b2d", Firmicutes_A = "#473f38", Firmicutes_C = "#ff7b00", Actinobacteriota = "#b8a70d", Planctomycetota = "#b0d45d", Fibrobacterota = "#85ab7b", Patescibacteria = "#5f8385", Proteobacteria = "#676ab5", Fusobacteriota = "#9967b5", Verrucomicrobiota = "#d13d73", Thermoplasmatota = "#75141a")
)
col1 = colorRampPalette(c("#E2ECE5", "#73aabf"))(111) #set the order of greys
col1[c(1)] = c("white")
matriz_binaria <- ifelse(matriz_conteo_modificada > 0, 1, 0)
order_EC = c("3.2.1.4", "3.2.1.21", "2.7.7.4", "2.7.1.25", "1.8.4.8", "1.8.1.2", "1.8.7.1", "1.8.99.2", "1.8.99.5", "1.2.7.12", "1.5.98.2", "1.8.98.1", "2.8.4.1", "2.7.2.1", "2.3.1.8", "6.2.1.1", "2.3.1.169", "2.1.1.245", "1.2.7.4", "2.1.1.90", "2.1.1.246", "2.1.1.248")
matriz_binaria <- matriz_binaria[, order_EC]
MAGs_plot = pheatmap(matriz_binaria, annotation_row = df_unique, cluster_rows = T, cluster_cols = F, show_rownames =T, annotation_colors = ann_colors, color = col1, angle_col = 45, legend = T)
```
# assemblies - presence/absence EC numbers
```{r}
library(pheatmap)
library(dplyr)
data <- read.csv2('functional-search/kofamscan_summary_assemblies_filtered.csv', sep = ',')
library(ggplot2) # plotting
library(GDAdata) # data (SpeedSki)
library(stringr)
library(scales)
order_activity = c("cellulose", "assimilatory", "dissimilatory", "hydrogenotrophic", "acetoclastic", "methylotrophic")
data$activity <- factor(data$activity, levels = order_activity)
order_EC = c("3.2.1.4", "3.2.1.91", "3.2.1.21", "2.7.7.4", "2.7.1.25", "1.8.4.8", "1.8.1.2", "1.8.7.1", "1.8.99.2", "1.8.99.5", "1.2.7.12", "2.3.1.101", "3.5.4.27", "1.5.98.1", "1.5.98.2", "2.1.1.86", "1.8.98.1", "2.8.4.1", "2.3.1.8", "2.7.2.1", "6.2.1.1", "2.3.1.169", "2.1.1.245", "1.2.7.4", "2.1.1.90", "2.1.1.246", "2.1.1.248", "2.1.1.249", "2.1.1.247")
data$EC <- factor(data$EC_proteina, levels = order_EC)
p <- ggplot(data, aes(assembly, EC)) +
geom_bin2d(aes(fill = after_stat(count > 0)), bins = 30)
p = p + theme(axis.text.x = element_text(angle = 0, size=15, hjust = 0.5, vjust = 0.5), axis.text.y = element_text(size=12), panel.background = element_rect(fill = "white", colour = "grey"), axis.title.y = element_text(size = 15))
p = p + facet_grid(activity~., scales = "free", space = "free") +
scale_fill_manual(values = "#73aabf") +
scale_x_discrete(labels = label_wrap(40)) +
theme(axis.title.x = element_blank(),
axis.title.y = element_blank())+
scale_y_discrete(limits=rev)
assemblies_plot = p + geom_tile(color = "white", size = 0.5, linetype = 1, fill = NA)
```