-
Notifications
You must be signed in to change notification settings - Fork 0
/
CUPSAT Whole Predictions.R
133 lines (113 loc) · 5.72 KB
/
CUPSAT Whole Predictions.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
library(ggplot2)
library(tidyverse)
### Residue change
# Read in data from CUPSAT predictions: https://cupsat.brenda-enzymes.org/
df <- read.csv("~/CUPSAT predictions.csv", sep = ',', header = TRUE)
# Remove "C" from the chain column
df <- df %>%
filter(Chain != "C")
# If the amino acid changes are categorical and there are a lot of them, it might be useful to factor it
# This way you ensure that ggplot treats it as a categorical variable
df$Amino.acid <- as.factor(df$Amino.acid)
df$Residue.ID <- as.numeric(df$Residue.ID)
### Normalize the data for each residue of interest
# Compute the mean and standard deviation of delta_G values
mean_delta_G <- mean(df$Predicted.DDG..kcal.mol.)
std_dev_delta_G <- sd(df$Predicted.DDG..kcal.mol.)
# Normalize the delta_G values
df <- df %>%
mutate(normalized_delta_G = (Predicted.DDG..kcal.mol. - mean_delta_G) / std_dev_delta_G)
# Plot the whole data as a scatterplot
plot <- ggplot(df, aes(x=Residue.ID, y=normalized_delta_G, color=Amino.acid)) +
geom_boxplot(width = 0.3, height = 0, alpha = 0.5) +
theme(
axis.text.x = element_text(angle = 90, hjust = 1, size = 12),
axis.title = element_text(size = 14),
axis.text.y = element_text(size = 12)) +
labs(x="Residue ID", y="Whole Z-Score", color="Amino Acid Change")
plot
# Save the plot
ggsave("~/CUPSAT Predictions Whole AGO3.png", plot, width = 10, height = 10, dpi = 300)
### P2
# Filter to include only the residues of interest
df_subset <- df %>% filter(between(Residue.ID, 224, 226))
# Create a new column for color
df_subset$color <- ifelse(df_subset$Amino.acid == "LEU" & df_subset$Residue.ID == 225, "red", "green")
# Create a new column for point size
df_subset$point_size <- ifelse(df_subset$Amino.acid == "LEU" & df_subset$Residue.ID == 225, 1.5, 1)
# Ensure that variables are in the correct format
df_subset$Residue.ID <- as.factor(df_subset$Residue.ID)
df_subset$Amino.acid <- as.factor(df_subset$Amino.acid)
# Plot just the region of interest +- 10 in a scatterplot
plot2 <- ggplot(df_subset, aes(x=Residue.ID, y=normalized_delta_G, color=color, size=point_size)) +
geom_boxplot(aes(color = color), width = 0.3, height = 0, alpha = 0.8) +
annotate("rect", xmin = as.numeric(2) - 0.5, xmax = as.numeric(2) + 0.5, ymin = as.numeric(0) - 2, ymax = as.numeric(0) + 2, alpha = 0.1, fill = "blue") +
scale_color_identity() +
scale_size_identity() +
theme_minimal() +
theme(
legend.position = 'none',
axis.text.x = element_text(angle = 45, hjust = 1, size = 25),
axis.title.x = element_text(size = 30),
axis.title.y = element_blank(), # Remove y-axis title if you are putting graphs on same axis
axis.text.y = element_text(size = 25)) +
labs(x="Residue", y="Whole Z-Score", color="Amino Acid Change") +
scale_y_continuous(limits = c(-2, 2))
plot2
# Save the plot
ggsave("~/CUPSAT Whole 225.png", plot2, width = 10, height = 10, dpi = 300)
### P1
# Filter to include only the residues of interest
df_subset <- df %>% filter(between(Residue.ID, 506, 508))
# Create a new column for color
df_subset$color <- ifelse(df_subset$Amino.acid == "TRP" & df_subset$Residue.ID == 507, "red", "green")
# Create a new column for point size
df_subset$point_size <- ifelse(df_subset$Amino.acid == "TRP" & df_subset$Residue.ID == 507, 1.5, 1)
# Ensure that variables are in the correct format
df_subset$Residue.ID <- as.factor(df_subset$Residue.ID)
df_subset$Amino.acid <- as.factor(df_subset$Amino.acid)
# Plot just the region of interest +- 10 in a scatterplot
plot3 <- ggplot(df_subset, aes(x=Residue.ID, y=normalized_delta_G, color=color, size=point_size)) +
geom_boxplot(aes(color = color), width = 0.3, height = 0, alpha = 0.8) +
annotate("rect", xmin = as.numeric(2) - 0.5, xmax = as.numeric(2) + 0.5, ymin = as.numeric(0) - 2, ymax = as.numeric(0) + 2, alpha = 0.1, fill = "blue") +
scale_color_identity() +
scale_size_identity() +
theme_minimal() +
theme(
legend.position = 'none',
axis.text.x = element_text(angle = 45, hjust = 1, size = 25),
axis.title.x = element_text(size = 30),
axis.title.y = element_blank(), # Remove y-axis title if you are putting graphs on same axis
axis.text.y = element_text(size = 25)) +
labs(x="Residue", y="Whole Z-Score", color="Amino Acid Change") +
scale_y_continuous(limits = c(-2, 2))
plot3
# Save the plot
ggsave("~/CUPSAT Whole 507.png", plot3, width = 10, height = 10, dpi = 300)
### E638A
# Filter to include only the residues of interest
df_subset <- df %>% filter(between(Residue.ID, 637, 639))
# Create a new column for color
df_subset$color <- ifelse(df_subset$Amino.acid == "ALA" & df_subset$Residue.ID == 638, "red", "green")
# Create a new column for point size
df_subset$point_size <- ifelse(df_subset$Amino.acid == "ALA" & df_subset$Residue.ID == 638, 1.5, 1)
# Ensure that variables are in the correct format
df_subset$Residue.ID <- as.factor(df_subset$Residue.ID)
df_subset$Amino.acid <- as.factor(df_subset$Amino.acid)
# Plot just the region of interest +- 10 in a scatterplot
plot4 <- ggplot(df_subset, aes(x=Residue.ID, y=normalized_delta_G, color=color, size=point_size)) +
geom_boxplot(aes(color = color), width = 0.3, height = 0, alpha = 0.8) +
annotate("rect", xmin = as.numeric(2) - 0.5, xmax = as.numeric(2) + 0.5, ymin = as.numeric(0) - 2, ymax = as.numeric(0) + 2, alpha = 0.1, fill = "blue") +
scale_color_identity() +
scale_size_identity() +
theme_minimal() +
theme(
legend.position = 'none',
axis.text.x = element_text(angle = 45, hjust = 1, size = 25),
axis.title = element_text(size = 30),
axis.text.y = element_text(size = 25)) +
labs(x="Residue", y="Whole Z-Score", color="Amino Acid Change") +
scale_y_continuous(limits = c(-2, 2))
plot4
# Save the plot
ggsave("~/CUPSAT Whole 638.png", plot4, width = 10, height = 10, dpi = 300)