-
Notifications
You must be signed in to change notification settings - Fork 2
/
limma & limma voom
114 lines (62 loc) · 2.9 KB
/
limma & limma voom
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
######################################################################
################### Then for limma/voom #################
######################################################################
source("http://bioconductor.org/biocLite.R")
biocLite("Limma")
#DGE for microarray by limma
library('ggplot2')
library('limma')
library(xlsx)
library(dplyr)
#读入
rawexprSet=read.table("./GSE190465_series_matrix.txt",sep="\t",header=T)
#数据清洗
dim(rawexprSet)
colnames(rawexprSet)
rawexprSet<-rawexprSet%>%select("ID_REF" , "GSM5724115" ,
"GSM5724116" ,"GSM5724117", "GSM5724118" ,
"GSM5724119", "GSM5724120")
avereps(rawexprSet[,-1],ID=rawexprSet$ID_REF) #重复序列取均值,前半部分为只保留矩阵,后半部分设id(基因名)列为被查找的重复列
datagene<-rawexprSet[!duplicated(rawexprSet$ID_REF),] #合并删除重复序列
row.names(datagene)<-datagene[,1]
datagene<-datagene[,-1]
rawexprSet<-datagene
exprSet=log2(rawexprSet)#log2转换
exprSet=rawexprSet#如果已经是log2数据,运行这一步
exprSet[1:5,1:5]
## 画箱线图,比较数据分布情况
n.sample=ncol(rawexprSet)
cols <- rainbow(n.sample)
#par(mfrow=c(2,2)) 一张图上四个小表格
boxplot(data.frame(exprSet),col=cols)
ggsave("boxplot1.pdf", width = 8, height = 16)
dev.off()
#读取样本分类信息
colnames(rawexprSet)
group <- read.csv("datTraits.csv",header=TRUE,row.names=1,check.names = FALSE)
design <- model.matrix(~0+factor(group$treat_type))#把group设置成一个model matrix#
colnames(design)=levels(factor(group$treat_type))
rownames(design)=colnames(exprSet)
design
fit <- lmFit(exprSet,design)##线性拟合
cont.matrix<-makeContrasts(Ischemia-Shame,levels = design)
fit2=contrasts.fit(fit,cont.matrix)##用对比模型进行差值计算
fit2 <- eBayes(fit2) ##贝叶斯检验
#eBayes() with trend=TRUE
tempOutput = topTable(fit2,coef=1,n=Inf,adjust="BH",sort.by="B",resort.by="M")
nrDEG = na.omit(tempOutput)
write.csv(nrDEG, "limmaOut.csv")
#筛选有显著差异的基因 adj.P.Val意思是矫正后P值
foldChange=1 #fold change=1意思是差异是两倍
pvalue =0.05 #0.05
diff <- nrDEG
diffSig = diff[(diff$P.Val < pvalue & (diff$logFC>foldChange | diff$logFC<(-foldChange))),]
#write.table(diffSig, file="diffSig.xls",sep="\t",quote=F)
write.csv(diffSig, "diffSig.csv")
#把上调和下调分别输入up和down两个文件
diffUp = diff[(diff$P.Val < pvalue & (diff$logFC>foldChange)),]#foldchange>0是上调,foldchange<0是下调#
#write.table(diffUp, file="up.xls",sep="\t",quote=F)#把上调和下调分别输入up和down两个文件#
write.csv(diffUp, "diffUp.csv")
diffDown = diff[(diff$P.Val< pvalue & (diff$logFC<(-foldChange))),]
#write.table(diffDown, file="down.xls",sep="\t",quote=F)
write.csv(diffDown, "diffDown.csv")