-
Notifications
You must be signed in to change notification settings - Fork 0
/
Cas_SNP_baseline_OR.R
129 lines (86 loc) · 3.78 KB
/
Cas_SNP_baseline_OR.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
## OR for Cas annotation and functiona annotations
flb <- list.files('/home/ranran/ldsc/baseline_v2.2')
flb <- flb[grep('annot.gz', flb)]
baseline <- c()
for (i in 1:length(flb)) {
tmp <- read.table(gzfile(paste0('/home/ranran/ldsc/baseline_v2.2/', flb[i])), header = T, stringsAsFactors = F)
baseline <- rbind(baseline, tmp)
cat(i, ' ')
}
## get interger baseline
cla <- c()
for (i in 6:ncol(baseline)) {
tmp <- class(baseline[,i])
cla <- c(cla, tmp)
}
idx <- which(cla == 'integer')
base <- baseline[,-c(1:5)]
base <- base[,idx]
base <- base[,-82]
## remove flanking annotation
idx <- grep('flanking.500', colnames(base))
base <- base[,-idx]
## remove MAFbin annotation
idx <- grep('MAFbin', colnames(base))
base <- base[,-idx]
dir <- list.dirs('/opt/working/projects/prj_026_CRISPR/Rev2203/Cas/')
#dir <- dir[grep('top10', dir, fixed = T)]
dir <- dir[grep('annotation', dir, fixed = T)]
pams <- unlist(strsplit(dir, '/opt/working/projects/prj_026_CRISPR/Rev2203/Cas//'))[seq(2, 2*length(dir),2)]
pams <- unlist(strsplit(pams, '/annotation'))
#pams[length(pams)] <- 'GC_content'
OR <- matrix(NA, ncol = ncol(base), nrow = length(dir))
#P <- matrix(NA, ncol = ncol(base), nrow = length(dir))
for (i in 1:length(dir)) {
fl <- list.files(dir[i])
fl <- fl[grep('annot.gz', fl)]
dd <- c()
for (j in 1:length(fl)) {
tmp <- read.table(paste0(dir[i], '/', fl[j]), header = T, stringsAsFactors = F)
dd <- rbind(dd, tmp)
cat(j, ' ')
}
for (j in 1:ncol(base)) {
OR[i,j] <- fisher.test(table(base[, j], dd[,1]))$estimate
#P[i,j] <- fisher.test(table(base[, j], dd[,1]))$p
cat(j, ' ')
}
cat(i, '\n')
}
colnames(OR) <- colnames(base)
rownames(OR) <- pams
saveRDS(OR, file = '/opt/working/projects/prj_026_CRISPR/Rev2203/Cas_SNP_baseline_OR.rds')
OR <- readRDS('~/Documents/Projects/NGG/Revision2203-debug/Cas_SNP_baseline_OR.rds')
new_names <- c('Coding (UCSC)' ,'Conserved (LindbladToh)' ,'CTCF (Hoffman)' ,'DGF (ENCODE)' ,'DHS Peak (Trynka)' ,'DHS (Trynka)'
,'Enhancer (Andersson)' ,'Enhancer (Hoffman)' ,'Fetal DHS (Trynka)' ,'H3K27ac (Hnisz)' ,'H3K27ac (PGC2)' ,'H3K4me1 Peaks (Trynka)'
,'H3K4me1 (Trynka)' ,'H3K4me3 Peaks (Trynka)' ,'H3K4me3 (Trynka)' ,'H3K9ac Peaks (Trynka)' ,'H3K9ac (Trynka)' ,'Intron (UCSC)'
,'Promoter Flanking (Hoffman)' ,'Promoter (UCSC)' ,'Repressed (Hoffman)' ,'H3K27ac (Hnisz)' ,'TFBS (ENCODE)' ,'Transcribed (Hoffman)' ,'TSS (Hoffman)'
,'3\' UTR (UCSC)' ,'5\' UTR (UCSC)' ,'Weak Enhancer (Hoffman)' ,'GERP RS > 4' ,'synonymous' ,'non_synonymous' ,'Conserved (Vertebrate)'
,'Conserved (Mammal)' ,'Conserved (Primate)' ,'BivFlnk' ,'Promoter (Villar)' ,'Enhancer (Villar)' ,'Ancient Promoter' ,'Ancient Enhancer' ,'Promoter of ExAC genes')
colnames(OR) <- new_names
## determine Cas order
cas_info <- read.csv('~/Documents/Projects/NGG/Revision-1/CRISPR_PAM_full_table2201.csv', header = T, stringsAsFactors = F)
cas <- unique(cas_info$Cas)
or <- OR[cas,]
colMain <- colorRamp2(c(-4, 0, 2), c("#BC3C29", "white", "#0072B5"))
M = 0.7
pdf('~/Documents/Projects/NGG/Revision2203-debug/Cas_annotation_OR_2203.pdf',16,8)
Heatmap(as.matrix(log(or)), name = 'Enrichment', col = colMain,
show_heatmap_legend = T,
heatmap_legend_param = list(
title = "log(OR)", at = c(-4, -2, 0, 2)#,
#labels = c("neg_two", "zero", "pos_two")
),
height = unit(21*M, 'cm'), width = unit(40*M, 'cm'),
#cell_fun = Pval_fun,
row_order = 1:21, column_order = 1:40,
#row_split = row_splits,
#column_split = col_splits,
#column_title = NULL,
#row_title_gp = gpar(fontsize = 13, col = trait_cate_col), row_title_rot = 90, row_title_side = 'right',
#bottom_annotation = featr_anoot,
row_names_gp = gpar(fontsize = 12), #col = cas_col),
row_names_side = 'left', column_names_side = 'top',
column_names_gp = gpar(fontsize = 12), column_names_rot = 60
)
dev.off()