R语言GEO数据处理(六)
# 5. 差异分析 -----------------------------------------------------------------
library(limma)
design=model.matrix(~group_list)
fit=lmFit(exp2,design)
fit=eBayes(fit)
deg=topTable(fit,coef=2,number = Inf)
colnames(deg)
#加probe_id列,把行名变成一列
library(dplyr)
deg <- mutate(deg,probe_id=rownames(deg$probe_id));head(deg)
#加symbol列,去重复
deg <- inner_join(deg,ids,by="probe_id");head(deg)
deg <- deg[!duplicated(deg$symbol),]
#标记上下调基因
logFC=0.1 #变化超过2倍的视为差异基因
P.Value = 0.01 #P值小于等于0.01视为显著
k1 = (deg$P.Value < P.Value)&(deg$logFC < -logFC)
k2 = (deg$P.Value < P.Value)&(deg$logFC > logFC)
change = ifelse(k1,"down",ifelse(k2,"up","not"))
deg <- mutate(deg,change);head(deg)
table(deg$change)
#加ENTREZID列,用于富集分析
library(ggplot2)
library(clusterProfiler)
library(org.Hs.eg.db)
s2e <- bitr(deg$symbol,
fromType = "SYMBOL",
toType = "ENTREZID",
OrgDb = org.Hs.eg.db)#人类
deg <- inner_join(deg,s2e,by=c("symbol"="SYMBOL"));head(deg)
write.csv(deg,file = "mydata.csv",row.names = T)
save(group_list,deg,logFC_t,P.Value_t,file = "mydata.Rdata")