-
Notifications
You must be signed in to change notification settings - Fork 1
/
7 - Module exploration - Visant outputs of module connections.R
234 lines (186 loc) · 8.05 KB
/
7 - Module exploration - Visant outputs of module connections.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
####
##### This is a script to create files for Visant visulisation of modules
### as well as outputting into, say, Cytoscape
setwd("~/Bioinformatics Work/Meth & RNA/Meta-analysis WGCNA")
library(WGCNA)
library(plyr)
library(flashClust)
library(ggplot2)
library(gplots)
library(lattice)
library(extrafont)
loadfonts()
##############
load(file = "MetaAnalysis_trimmed_input.RData")
load(file = "Modules_DS0.RData")
colorsA1 = names(table(modules1))
### add in the rownames for the ME_ data frames
rownames(ME_1A) <- colnames(datExpr1)
rownames(ME_2A) <- colnames(datExpr2)
## first gotta do the functions urg'
# ===================================================
#The function TOMdist1 computes a dissimilarity
# based on the topological overlap matrix (Ravasz et al)
# Input: an Adjacency matrix with entries in [0,1]
if(exists("TOMdist1")) rm(TOMdist1);
TOMdist1=function(adjmat1, maxADJ=FALSE) {
diag(adjmat1)=0;
adjmat1[is.na(adjmat1)]=0;
maxh1=max(as.dist(adjmat1) ); minh1=min(as.dist(adjmat1) );
if (maxh1>1 | minh1 < 0 ) {print(paste("ERROR: the adjacency matrix contains entries that are larger than 1 or smaller than 0!!!, max=",maxh1,", min=",minh1)) } else {
if ( max(c(as.dist(abs(adjmat1-t(adjmat1)))))>10^(-12) ) {print("ERROR: non-symmetric adjacency matrix!!!") } else {
adjmat1= (adjmat1+ t(adjmat1) )/2
kk=apply(adjmat1,2,sum)
maxADJconst=1
if (maxADJ==TRUE) maxADJconst=max(c(as.dist(adjmat1 )))
Dhelp1=matrix(kk,ncol=length(kk),nrow=length(kk))
denomTOM= pmin(as.dist(Dhelp1),as.dist(t(Dhelp1))) +as.dist(maxADJconst-adjmat1);
gc();gc();
numTOM=as.dist(adjmat1 %*% adjmat1 +adjmat1);
#TOMmatrix=numTOM/denomTOM
# this turns the TOM matrix into a dissimilarity
out1=1-as.matrix(numTOM/denomTOM)
diag(out1)=1
# setting the diagonal to 1 is unconventional (it should be 0)
# but it leads to nicer looking TOM plots...
out1
}}}
visantPrepOverall <- function(colorFinal2, moduleColor, datExprrest, genes, numInt, power, signed=FALSE)
{
## This file returns the overall strongest connections within a module for one group of subjects
## USER inputs
# colorFinal2 = vector with module association for each probe
# moduleColor = color of the module... should be a member of colorFinal2
# datExprrest = expression data for the genes corresponding to cIndex
# genes = list of genes that correspond to the probes
# numInt = number of interactions to output to the visant file
cIndex = (colorFinal2==moduleColor)
datExpr=datExprrest[,cIndex]
if (signed){AdjMat =((1+cor(datExpr, use="p"))/2)^power
} else {AdjMat =abs(cor(datExpr, use="p"))^power}
diag(AdjMat)=0
Degree <- apply(AdjMat,1,sum)
Degree = Degree/max(Degree)
meanExpr=apply(datExpr,2,mean)
ProbeSet=colnames(datExpr)
GeneSet=genes[cIndex];
Module=rep(moduleColor,length(meanExpr))
## This file summarizes intramodular connectivity and expression for each gene in each group:
fn=paste(moduleColor,"_connectivityOverall.csv",sep="")
datConn = cbind(ProbeSet,GeneSet,meanExpr,Degree,Module)
datConn = datConn[order(Degree, decreasing=TRUE),]
write.table(datConn,file=fn,sep=",",
row.names=F, col.names= c("probes","genes","meanExpr","kin","module"))
write(paste(fn, "written."),"")
## TOM matrix
distTOM <- TOMdist1(AdjMat)
simTOM = 1-distTOM
diag(simTOM)=0
simTOMcutoff = simTOM
## Correlation matrix
Pearson = cor(datExpr ,use="p")
diag(Pearson)=0
## Dynamically determine the appropriate cutoff
cutoff = 0.24
len = 10000
dir = "increase"
loops = 0
split = 0.01
numInt = numInt+100
while(len>100){
loops = loops+1
if (dir == "increase") { cutoff = cutoff+split; }
if (dir == "decrease") { cutoff = cutoff-split; }
indices = (simTOMcutoff>cutoff)
len = sum(sum(indices))
if (len < numInt) {dir = "decrease";}
if (len >=numInt) {dir = "increase";}
len = abs(len-numInt)
if (loops>500){ len=0;}
if ((loops%%100)==0){ split = split/2; }
}
write(c(loops,cutoff,len),"")
## Output using cutoffs:
indices = (simTOMcutoff[1,]>cutoff)
datout=cbind(
rep(GeneSet[1], length(ProbeSet[indices])),
GeneSet[indices],rep(0, length(ProbeSet[indices])),
rep("M1002", length(ProbeSet[indices])),
simTOM[1,][indices], Pearson[1,][indices])
colnames(datout) = c("gene1","gene2","zero","color","TO","Correlation")
for(i in seq(2,length(ProbeSet),by=1)){
indices = (simTOMcutoff[i,]>cutoff)
datout=rbind(datout,cbind(
rep(GeneSet[i], length(ProbeSet[indices])),
GeneSet[indices],rep(0, length(ProbeSet[indices])),
rep("M1002", length(ProbeSet[indices])),
simTOM[i,][indices], Pearson[i,][indices]))
}
datout = datout[order(datout[,5],decreasing=TRUE),]
fn = paste(moduleColor,"_visantOverall.csv",sep="")
write.table(datout,file=fn,sep=",",row.names=F, col.names=c("gene1","gene2",
"zero","color","TO","Correlation"))
write(paste(fn, "written."),"")
}
visantPrep <- function(colorFinal2, moduleColor, ciIndex, msIndex, datExprrest, genes, cutoff, power=power, signed=FALSE)
{
## This file compares two groups of subjects and returns the connections that are differential between the two groups
## USER inputs
# colorFinal2 = vector with module association for each probe
# moduleColor = color of the module... should be a member of colorFinal2
# ciIndex = index of control subjects
# msIndex = index of AD subjects
# datExprrest = expression data for the genes corresponding to cIndex
# genes = list of genes that correspond to the probes
# cutoff = NOT A CUTOFF!!! NUMBER OF INTEGERS TO INCLUDE IN THE ANALYSIS!!!
# power = power of the network
# signed = is this a signed network TRUE/FALSE
cIndex = (colorFinal2==moduleColor)
datExprCI=datExprrest[ciIndex,cIndex]
datExprMS=datExprrest[msIndex,cIndex]
if (signed){AdjMatCI =((1+cor(datExprCI, use="p"))/2)^power
} else {AdjMatCI =abs(cor(datExprCI, use="p"))^power}
if (signed){AdjMatMS =((1+cor(datExprMS, use="p"))/2)^power
} else {AdjMatMS =abs(cor(datExprMS, use="p"))^power}
diag(AdjMatCI)=0
diag(AdjMatMS)=0
DegreeCI <- apply(AdjMatCI,1,sum)
DegreeMS <- apply(AdjMatMS,1,sum)
DegreeCI = DegreeCI/max(DegreeCI)
DegreeMS = DegreeMS/max(DegreeMS)
meanExprCI=apply(datExprCI,2,mean)
meanExprMS=apply(datExprMS,2,mean)
ProbeSet1=colnames(datExprMS)
GeneSet1=genes[cIndex];
Module=rep(moduleColor,length(meanExprMS))
if (cutoff>=0){
## This file summarizes intramodular connectivity and expression for each gene in each group:
write.table( cbind(ProbeSet1,GeneSet1,meanExprCI,meanExprMS,DegreeCI,DegreeMS,Module),
file= paste(moduleColor,"_connectivity.csv",sep=""),sep=",",row.names=F,
col.names= c("Probes","genes","meanExprCI","meanExprMS","kin_CI","kin_MS","Module"))
## Output correlation between kIn_CI and kIn_MS
write("kIn_CI vs. kIN_MS correlation","")
corr = cor(DegreeCI, DegreeMS, method="s")
write(corr,"")
}
}
################################################################
for(co in colorsA1[colorsA1!="grey"])
visantPrepOverall(modules1, co, t(datExpr1),
rownames(datExpr1), 500, softPower, TRUE)
for(co in colorsA1[colorsA1!="grey"])
visantPrepOverall(modules1, co, t(datExpr2),
rownames(datExpr2), 500, softPower, TRUE)
#### HUB genes specific to one network?
datExprA12g = t(cbind(datExpr1,datExpr2))
i1 = 1:dim(datExpr1)[[2]];
i2 = (1:dim(datExpr2)[[2]])+length(i1)
for (co in colorsA1[colorsA1!="grey"])
visantPrep(modules1, co, i1, i2, datExprA12g,
rownames(datExpr1), 500, softPower, TRUE)
#####
### genes specific to TCGA can be found:
### in connectivity files, those with high kin_CI and low kin_MS are only hubs in TCGA
#### in viasnt files, genes with many connections ot TO-Ratio are hubs only in TCGA
### These last visant files need to be modified- the "0" and the colour columns
### are missing, and the columns A,C, E, F, H, and I are extraneous