R语言绘制带有显著性字母标记的柱状图
新
年
快
乐
Tao Wen
2019年1月6日
想想看,人生不觉得过了好多,事情还是需要简单的做。
library(tidyverse)
library(agricolae)
library(car)
library(reshape2)
需求
在很多时候,我们的需求其实很简单,做一个数据描述,方差分析和基本图形的展示; 在做方差分析之前,我们需要对数据进行正太检验,方差齐性检验。方差分析之后,我们做多重比较。
我们做正态检测
####数据导入
setwd("F:/#黄老师组会/第四次周日交流R语言/R语言入门")
data_wt = read.table("R语言入门.txt", header = T, row.names= 1, sep="\t");head(data_wt,n = 10L)## group Tn## CK1 CK 1.10## CK2 CK 1.11## CK3 CK 1.20## CK4 CK 1.02## CK5 CK NA## BOF1 BOF 2.00## BOF2 BOF 2.10## BOF3 BOF 2.01## BOF4 BOF 2.03## BOF5 BOF 3.03
##数据整理
colnames(data_wt) = c("group","TN")head(data_wt)## group TN## CK1 CK 1.10## CK2 CK 1.11## CK3 CK 1.20## CK4 CK 1.02## CK5 CK NA## BOF1 BOF 2.00
#data_wt$TN = as.numeric(data_wt$TN)#####数据数据分析与转换+可视化########是否符合正态分布#######
qqPlot(lm(data_wt$TN ~ data_wt$group, data=data_wt), simulate=TRUE, main="Q-Q Plot", lables=FALSE)
## BOF1 BOF5 ## 6 10
##正态检验
shapiro.test(data_wt$TN)## ## Shapiro-Wilk normality test## ## data: data_wt$TN## W = 0.85895, p-value = 0.09338
#无论Bartlett检验还是Levene检验,两者的P值都大于0.05,#因此接受原假设:样本之间的方差是相同的。因此可以接着做方差分析了。# Bartlett检验
bartlett.test(data_wt$TN ~ data_wt$group, data=data_wt)## ## Bartlett test of homogeneity of variances## ## data: data_wt$TN by data_wt$group## Bartlett's K-squared = 6.1377, df = 1, p-value = 0.01323
# Levene检验,对原始数据的正态性不敏感
leveneTest(data_wt$TN ~ data_wt$group, data=data_wt)## Levene's Test for Homogeneity of Variance (center = median)## Df F value Pr(>F)## group 1 0.6354 0.4516## 7
#######方差检验model<-aov(TN ~ group, data=data_wt)
#进行多重比较,不矫正P值
out <- LSD.test(model,"group", p.adj="none")#结果显示:标记字母法out$group## TN groups## BOF 2.2340 a## CK 1.1075 b
grou<- group_by(data_wt,group)bar_data <- summarise(grou,sd(TN,na.rm = T))bar_data2 = merge(bar_data ,out$group,by.x="group",by.y = "row.names",all = F)#colnames(bar_data2) = c("group", "SD","TN","label");bar_data2
## group SD TN label## 1 BOF 0.4466878 2.2340 a## 2 CK 0.0736546 1.1075 b
######画一张图看看
library("ggplot2")a = max(bar_data2$TN)*1.5mi=c("#1B9E77" ,"#D95F02", "#7570B3","#E7298A")p = ggplot(bar_data2 , aes(x = group, y = TN)) + geom_bar(stat = "identity", width = 0.4,position = "dodge",colour="black",fill="#E6AB02") + geom_errorbar(aes(ymin=TN - SD, ymax=TN + SD), colour="black",width=0.1,size=1)+ geom_text(aes(label = label ,y= TN + SD, x = group),vjust = -0.3,size = 6)+ scale_y_continuous(expand = c(0,0),limits = c(0,a))+ labs(x="TN of all group", y="group", title = "")p
p=p+theme_bw()+ geom_hline(aes(yintercept=mean(TN + SD)), colour="black", linetype=2) + geom_vline(aes(xintercept=0), colour="black", linetype="dashed") + scale_fill_manual(values = mi, guide = guide_legend(title = NULL))+ theme( panel.grid.major=element_blank(), panel.grid.minor=element_blank(), plot.title = element_text(vjust = -8.5,hjust = 0.1), axis.title.y =element_text(size = 20,face = "bold",colour = "black"), axis.title.x =element_text(size = 24,face = "bold",colour = "black"), axis.text = element_text(size = 20,face = "bold"), axis.text.x = element_text(colour = "black",size = 14), axis.text.y = element_text(colour = "black",size = 14), legend.text = element_text(size = 15,face = "bold") #legend.position = "none"#是否删除图例 ) p
整个过程流程化,我们编写一个脚本
我们编号之后,可以在几秒钟得到结果。
setwd("F:/#文涛分析技术/a2R语言技术/R语言脚本学习合集/理化数据得到及其基本统计分析")
# 读入实验设计
data_wt = read.table("理化数据示范.txt", header=T, row.names= 1, sep="\t");head(data_wt)## group TN TP TK SOC TOC## CK1 CK 1.10 11.0 5.50 12 120## CK2 CK 1.11 11.1 5.55 13 130## CK3 CK 1.20 12.0 6.00 11 110## CK4 CK 1.02 10.2 5.10 14 NA## CK5 CK NA 20.2 10.10 15 NA## BOF1 BOF 2.00 20.0 10.00 5 50
head(data_wt)
## group TN TP TK SOC TOC## CK1 CK 1.10 11.0 5.50 12 120## CK2 CK 1.11 11.1 5.55 13 130## CK3 CK 1.20 12.0 6.00 11 110## CK4 CK 1.02 10.2 5.10 14 NA## CK5 CK NA 20.2 10.10 15 NA## BOF1 BOF 2.00 20.0 10.00 5 50
FileName <- paste("all_Q_Q_正态分布图", ".pdf", sep = "")pdf(file=FileName)for (i in 2:ncol(data_wt)) { opar = par(no.readonly = T) par(mfrow = c(3,2)) for (i in 2:ncol(data_wt)) { data_i = data_wt[i] ee <-as.matrix(data_i) dd <- as.vector(ee) aoc = bartlett.test(dd ~ group, data=data_wt) b = round(aoc[[3]],3) name_i = colnames(data_wt[i]) p = paste("p: ",b,name_i, sep = "");p qqPlot(lm(dd ~ group, data = data_wt), simulate=TRUE, main="Q-Q Plot", lables=F,ylab= p) } par(opar)}dev.off()
## png ## 2
for (i in 2:ncol(data_wt)) { data_i = data_wt[i] ee <-as.matrix(data_i) dd <- as.vector(ee) name_i = colnames(data_wt[i]) model<-aov(dd ~ group, data=data_wt)#方差分析 out <- LSD.test(model,"group", p.adj="none")#进行多重比较,不矫正P值 aa = out$group#结果显示:标记字母法 aa$group = row.names(aa) a = max(aa$dd)*1.2 aa wen1 = as.data.frame(tapply(dd,data_wt$group,mean,na.rm=TRUE)) wen2 = as.data.frame(tapply(dd,data_wt$group,sd,na.rm=TRUE)) went = cbind(wen1,wen2) wentao = merge(aa,went, by="row.names",all=F) colnames(wentao) = c(colnames(wentao[1:4]),"mean" ,"SD") aa = mutate(wentao, ymin = mean - SD, ymax = mean + SD) mi=c("#1B9E77" ,"#D95F02", "#7570B3","#E7298A") p = ggplot(aa , aes(x = group, y = dd)) + geom_bar(stat = "identity", width = 0.4,position = "dodge",colour="black",fill="#E6AB02") + geom_text(aes(label = groups,y=ymax, x = group,vjust = -0.3,size = 6))+ geom_errorbar(aes(ymin=ymin, ymax=ymax), colour="black",width=0.1,size = 1)+ scale_y_continuous(expand = c(0,0),limits = c(0,a))+ labs(x=paste(name_i,"of all group", sep = "_"), y="group", title = "") p=p+theme_bw()+ geom_hline(aes(yintercept=mean(dd)), colour="black", linetype=2) + geom_vline(aes(xintercept=0), colour="black", linetype="dashed") + scale_fill_manual(values = mi, guide = guide_legend(title = NULL))+ theme( panel.grid.major=element_blank(), panel.grid.minor=element_blank(), plot.title = element_text(vjust = -8.5,hjust = 0.1), axis.title.y =element_text(size = 20,face = "bold",colour = "black"), axis.title.x =element_text(size = 24,face = "bold",colour = "black"), axis.text = element_text(size = 20,face = "bold"), axis.text.x = element_text(colour = "black",size = 14), axis.text.y = element_text(colour = "black",size = 14), legend.text = element_text(size = 15,face = "bold"), legend.position = "none"#是否删除图例 ) p FileName <- paste(name_i," 字母带bar值柱状图", ".pdf", sep = "_") ggsave(FileName, p, width = 8, height = 8) }dev.off()
## null device ## 1
p
##其他方法# Levene检验,对原始数据的正态性不敏感
leveneTest(TN ~ group, data=data_wt)## Levene's Test for Homogeneity of Variance (center = median)## Df F value Pr(>F)## group 3 0.5092 0.6823## 14
#无论Bartlett检验还是Levene检验,两者的P值都大于0.05,因此接受原假设:样本之间的方差是相同的。因此可以接着做方差分析了。#方差分析,这里缺失值自动忽略,不需处理
model<-aov(TN ~ group, data=data_wt)#进行多重比较,不矫正P值out <- LSD.test(model,"group", p.adj="none")#结果显示:标记字母法out$group## TN groups## OF 3.3400 a## BOF 2.2340 b## CF 1.9750 b## CK 1.1075 c