-
Notifications
You must be signed in to change notification settings - Fork 0
/
cluster_graphs.R
131 lines (109 loc) · 3.69 KB
/
cluster_graphs.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
library(ggplot2)
library(cowplot)
library(tidyverse)
cii <- read_csv('/home/Julian.Trachsel/Documents/gifrop/test_data4/gifrop_out/clustered_island_info.csv')
pgff <- read_csv('/home/Julian.Trachsel/Documents/gifrop/test_data4/gifrop_out/islands_pangenome_gff.csv')
cii %>%
ggplot(aes(x=log(num_genes), y=factor(primary_cluster), fill=island_type)) +
geom_point(shape=21, color='white', size=3) +
theme_cowplot() +
theme(panel.grid.major = element_line(color='grey'))
cii %>%
filter(!is.na(RESISTANCE)) %>%
mutate(num_res=map_dbl(.x=strsplit(RESISTANCE, split='|', fixed = T), .f=length)) %>%# pull(num_res)
ggplot(aes(x=num_res, y=num_genes, color=island_type)) + geom_point() + xlim(0,7)
cii8 <- cii %>% filter(primary_cluster == 8)
# what's the 'core genome' of these islands?
cii8 %>%
ggplot(aes(x=log(num_genes), y=factor(secondary_cluster), fill=island_type)) +
geom_point(shape=21, color='white', size=3) +
theme_cowplot() +
theme(panel.grid.major = element_line(color='grey'))
cii8_PA <-
cii8 %>%
select(island_ID, genes) %>%
separate_rows(genes, sep = '\\|') %>%
mutate(present=1) %>%
pivot_wider(names_from = genes, values_from=present, values_fill=0)
#
cii8_matrix <- cii8_PA %>% column_to_rownames(var='island_ID') %>% as.matrix()
#
#
generate_edge_lists <- function(clustered_island_info, clust_level, cluster_vec){
# browser()
cii_red <- cii[cii[[clust_level]] %in% cluster_vec,]
island_genes <-
cii_red %>%
select(island_ID, genes) %>%
separate_rows(genes, sep='\\|') %>%
group_by(island_ID) %>%
nest() %>%
mutate(data=map(data, pull))
EDGE_LIST <-
expand_grid(island_genes, island_genes, .name_repair ='universal') %>%
filter(island_ID...1 != island_ID...3) %>%
mutate(in_common=map2_chr(.x = data...2, .y =data...4 , .f = ~ paste(.x[.x %in% .y] , collapse = '|')),
num_in_common=map2_dbl(.x = data...2, .y =data...4 , .f = ~ length(.x[.x %in% .y]))) %>%
select(-c(data...2, data...4)) %>%
transmute(from=island_ID...1,
to=island_ID...3,
in_common=in_common,
num_in_common=num_in_common) %>%
filter(num_in_common > 0)
return(EDGE_LIST)
}
#
generate_edge_lists(cii, clust_level = 'primary_cluster', cluster_vec = 8)
#
# test <- cii8 %>%
# select(island_ID, genes) %>%
# separate_rows(genes, sep='\\|') %>%
# group_by(island_ID) %>%
# nest() %>%
# mutate(data=map(data, pull))
#
# EDGE_LIST <- expand_grid(test, test,.name_repair ='universal') %>%
# filter(island_ID...1 != island_ID...3) %>%
# mutate(in_common=map2_chr(.x = data...2, .y =data...4 , .f = ~ paste(.x[.x %in% .y] , collapse = '|')),
# num_in_common=map2_dbl(.x = data...2, .y =data...4 , .f = ~ length(.x[.x %in% .y]))) %>%
# select(-c(data...2, data...4)) %>%
# transmute(from=island_ID...1,
# to=island_ID...3,
# in_common=in_common,
# num_in_common=num_in_common) %>%
# filter(num_in_common > 0)
#
# library(igraph)
#
#
# graph_from_edgelist(EDGE_LIST)
#
# cii8$flank_loc_tags
# cii8$seqid_len
#
# cii8$location_predict <- ifelse(cii8$flank_loc_tags == 'none|none', 'plas', 'chrom')
#
# cii8$island_type
#
#
# ###
#
# expand_grid(x = 1:3, y = 1:2)
# expand_grid(l1 = letters, l2 = LETTERS)
#
# # Can also expand data frames
# expand_grid(df = data.frame(x = 1:2, y = c(2, 1)), z = 1:3)
# # And matrices
# expand_grid(x1 = matrix(1:4, nrow = 2), x2 = matrix(5:8, nrow = 2))
#
#
# max(colSums(cii8_matrix))
#
# cii8_matrix[,which(colSums(cii8_matrix) == 6)]
#
#
#
pgff %>% filter(Gene == 'group_5474')
pgff %>% filter(Gene == 'group_171')
pgff %>% filter(Gene == 'group_792')
pgff %>% filter(Gene == 'group_65')