关于微生物门类堆叠柱状图,你知道的并不够
写在前面
无论是堆叠柱状图,还是近年来会扩展的冲击图。基本都只能对门水平物种多样性进行可视化。然而即使是门水平,也不一定是全部的样本都适合使用堆叠柱状图可视化。
尤其是土壤等复杂的微生物群落的环境,往往门水平的物种数量也在50个左右,或者有七八十个也是常见的。而堆叠柱状图一般可以展示的也就10个左右。过多无论是图例,还是颜色,都无法进行很好的安排和调配。
所以我们一般情况下展示的,都是主要的门类,也确实10个左右的门类基本代表了大部分的微生物。但在分析过程中却存在较大问题。
当我们选择这些主要的门类的时候存在两种选择:
对全部门类做相对丰度转化,并提取主要的门类物种展示,但必然堆叠柱状图的纵坐标达不到100%。
这样做丰度评估当然是十分准确的,但却不好看。
还有一个选择,我们对全部otu表格标准化后,提取主要的微生物门类,得到门类信息的sub表格后,再次标准化,然后就可以达到100%效果的y轴。
但是这样对主要微生物门类的丰度存在不同程度的变形,也就是说门类丰度展现的不够准确了。
所以不能做删减微生物门类,只能通过合并低丰度微生物门类达到效果,方可准确无误展示丰度
实战
第一种方式 y周丰度参差不齐
第一种方式直接参考这里
微生信生物新年放大招:一条代码完成堆叠柱状图-冲击图的操作-终结版
第二种方式 重新标准化 Y轴统一100%
我们是对于分析时认真的,所以也构建了一个函数,用于操作,参见文章末尾。
选择Top10:也就是相对丰度排名前十位的门水平物种展示,其余全部合并并作为others。所以一共展示了11种。
ps = readRDS("../data//ps_liu.rds")
result = barMainplot(ps = ps,j = "Phylum",rep = 6,axis_ord = NULL,label = FALSE ,sd = FALSE,Top = 10)
result[[1]]
result[[2]]
欢迎加入微生信生物
快来微生信生物
函数
barMainplot = function(otu = NULL,tax = NULL,map = NULL,ps = NULL,j = "Phylum",group = "Group",rep = 6,axis_ord = NULL,label = TRUE ,sd = FALSE,Top = 10,tran = TRUE){
# path = "./barplot/"
# dir.create(path)
library(phyloseq)
library(tidyverse)
library(vegan)
library(reshape2)
library("plyr")
library(ggalluvial)
library(ggplot2)
if (is.null(axis_ord)) {
axis_order = NA
}else{
axis_order = strsplit(basename(axis_ord), "-")[[1]]
}
if (is.null(otu)&is.null(tax)&is.null(map)) {
ps = ps
map = as.data.frame(sample_data(ps))
map = map[, group]
colnames(map) = "Group"
map$Group = as.factor(map$Group)
sample_data(ps) = map
map = NULL
}
if (!is.null(otu)|!is.null(tax)|!is.null(map) ) {
#导入otu表格
# otu = read.delim(otu,row.names = 1)
# head(otu)
otu = as.matrix(otu)
str(otu)
#导入注释文件
# tax = read.delim(tax,row.names = 1)
# head(tax)
tax = as.matrix(tax)
# taxa_names(tax)
#导入分组文件
# map = read.delim(map,row.names = 1)
# head(map)
colnames(map) = gsub(group,"AA", colnames(map))
map$Group = map$AA
map$Group = as.factor(map$Group )
map$Group
# #导入进化树
# tree = read.tree("./otus.tree")
# tree
ps <- phyloseq(otu_table(otu, taxa_are_rows=TRUE),
sample_data(map) ,
tax_table(tax)
# phy_tree(tree)
)
}
ps
# map = as.data.frame(sample_data(ps))
# head(map)
# colnames(map) = gsub(group,"AA", colnames(map))
#
# map$Group = map$AA
# map$Group = as.factor(map$Group )
# sample_data(ps) = map
ps1 = ps
i = ps1
colnames(tax_table(ps1))
psdata = i %>%
tax_glom(taxrank = j)
psdata
if (tran == TRUE) {
psdata = psdata%>%
transform_sample_counts(function(x) {x/sum(x)} )
}
vegan_otu <- function(physeq){
OTU <- otu_table(physeq)
if(taxa_are_rows(OTU)){
OTU <- t(OTU)
}
return(as(OTU,"matrix"))
}
otu = otu_table(psdata)
tax = tax_table(psdata)
head(tax)
for (i in 1:dim(tax)[1]) {
if (row.names(tax)[i] %in% names(sort(rowSums(otu), decreasing = TRUE)[1:Top])) {
tax[i,j] =tax[i,j]
} else {
tax[i,j]= "others"
}
}
tax_table(psdata)= tax
# #-----排序需要转化
# tax = tax_table(psdata )
# colnames(tax) = gsub(j,"Phylum",colnames(tax))
# tax_table(psdata) = tax
##这里我们过滤一定阈值的otu,会出现最后堆叠柱状图总体丰度高于100%的情况,这是合理的
###########绘制不同分类等级的柱状图
Taxonomies <- psdata %>%# Transform to rel. abundance
psmelt()
# %>% # Melt to long format
# # filter(Abundance >= k) %>% # Filter out low abundance taxa
# arrange(Phylum)
head(Taxonomies)
# dim(Taxonomies)
# 这里我们看到有很过属,因此颜色上就会出现不能很好区分的现象
colbar <- dim(unique(dplyr::select(Taxonomies, one_of(j))))[1]
colors = colorRampPalette(c( "#CBD588", "#599861", "orange","#DA5724", "#508578", "#CD9BCD",
"#AD6F3B", "#673770","#D14285", "#652926", "#C84248",
"#8569D5", "#5E738F","#D1A33D", "#8A7C64","black"))(colbar)
Taxonomies$Abundance = Taxonomies$Abundance * 100
# Taxonomies$Abundance = Taxonomies$Abundance/sum(Taxonomies$Abundance)
Taxonomies$Abundance = Taxonomies$Abundance/rep
head(Taxonomies)
#按照分组求均值
colnames(Taxonomies) <- gsub(j,"aa",colnames(Taxonomies))
by_cyl <- group_by(Taxonomies, Group,aa)
zhnagxu2 = dplyr :: summarise(by_cyl, sum(Abundance), sd(Abundance))
head(zhnagxu2)
##确定因子,这里通过求和按照从小到大的顺序得到因子
##长变宽
head(Taxonomies)
# Taxonomies2 = dcast(Taxonomies,aa ~ Sample,value.var = "Abundance")
# head(Taxonomies2)
# Taxonomies2[is.na(Taxonomies2)] <- 0
# aa = Taxonomies2
# # head(aa)
#
# n = ncol(aa)
# #增加一行,为整列的均值,计算每一列的均值,2就是表示列
# aa[n+1]=apply(aa[,c(2:ncol(aa))],1,sum)
# colnames(aa)[n+1] <- c("allsum")
# # str(aa)
# bb<- arrange(aa, allsum)
# # head(bb)
# bb = bb[c(1,ncol(bb))]
# cc<- arrange(bb, desc(allsum))
# head(cc)
iris_groups<- group_by(Taxonomies, aa)
cc<- dplyr::summarise(iris_groups, sum(Abundance))
head(cc)
colnames(cc)= c("aa","allsum")
cc<- arrange(cc, desc(allsum))
head(cc)
##使用这个属的因子对下面数据进行排序
head(zhnagxu2)
colnames(zhnagxu2) <- c("group","aa","Abundance","sd")
zhnagxu2$aa = factor(zhnagxu2$aa,order = T,levels = cc$aa)
zhnagxu3 = plyr::arrange(zhnagxu2,desc(aa))
head(zhnagxu3)
##制作标签坐标,标签位于顶端
# Taxonomies_x = ddply(zhnagxu3,"group", transform, label_y = cumsum(Abundance))
# head(Taxonomies_x )
#标签位于中部
# Taxonomies_x1 = ddply(zhnagxu3,"group", transform, label_y = cumsum(Abundance) - 0.5*Abundance)
Taxonomies_x = ddply(zhnagxu3,"group", transform, label_sd = cumsum(Abundance), label_y = cumsum(Abundance) - 0.5*Abundance)
# Taxonomies_x$label_y =
head(Taxonomies_x,6 )
Taxonomies_x$label = Taxonomies_x$aa
#使用循环将堆叠柱状图柱子比较窄的别写标签,仅仅宽柱子写上标签
for(i in 1:nrow(Taxonomies_x)){
if(Taxonomies_x[i,3] > 3){
Taxonomies_x[i,5] = Taxonomies_x[i,5]
}else{
Taxonomies_x[i,5] = NA
}
}
#----开始设置颜色映射数量,就是图例显示的数量。
#假如仅仅设置平均丰度排名前10位的展示标签,其他标记为others
# Top = 10
# i = 1
# Taxonomies_x$aa = as.character(Taxonomies_x$aa)
# for (i in 1:length(Taxonomies_x$aa)) {
# if (Taxonomies_x$aa[i] %in% cc$aa[1:Top]) {
# # Taxonomies_x$aa[i] = Taxonomies_x$aa[i]
# }else{
#
# Taxonomies_x$aa[i] = "others"
# }
# }
Taxonomies_x$aa = factor(Taxonomies_x$aa,order = T,levels = c(as.character(cc$aa)))
# cc$aa
# Taxonomies_x$aa
unique( Taxonomies_x$aa)
head(Taxonomies_x )
##普通柱状图
p4 <- ggplot(Taxonomies_x , aes(x = group, y = Abundance, fill = aa, order = aa)) +
geom_bar(stat = "identity",width = 0.5,color = "black") +
scale_fill_manual(values = colors) +
theme(axis.title.x = element_blank()) +
theme(legend.text=element_text(size=6)) +
scale_y_continuous(name = "Abundance (%)")
p4
if (is.na(axis_order)) {
p4 = p4
}else{
p4 = p4 +scale_x_discrete(limits = axis_order)
}
if (sd == TRUE) {
p4 = p4 +
geom_errorbar(aes(ymin=label_sd-sd, ymax=label_sd +sd), width=.2)
}
if (label == TRUE) {
p4 = p4 +
geom_text(aes(y = label_y, label = label ),size = 4,fontface = "bold.italic")
}
# print(p4)
# install.packages("ggalluvial")
p4 = p4+theme_bw()+
scale_y_continuous(expand = c(0,0))+
theme(
panel.grid.major=element_blank(),
panel.grid.minor=element_blank(),
text=element_text(face = "bold"),
plot.title = element_text(vjust = -8.5,hjust = 0.1),
axis.title.y =element_text(size = 20,face = "bold",colour = "black"),
axis.title.x =element_text(size = 24,face = "bold",colour = "black"),
axis.text = element_text(size = 20,face = "bold"),
axis.text.x = element_text(colour = "black",size = 14,),
axis.text.y = element_text(colour = "black",size = 14),
legend.text = element_text(size = 15)
#legend.position = "none"#是否删除图例
)
p4
map = as.data.frame(sample_data(ps))
if (length(unique(map$Group))>3){ p4=p4+theme(axis.text.x=element_text(angle=45,vjust=1, hjust=1))}
#-------------冲击图
head(Taxonomies_x )
cs = Taxonomies_x $aa
# head(cs)
# as.factor(Taxonomies_x $Genus)
# cs = as.character(Taxonomies_x $Genus)
# cs1 = as.factor(cs)
cs1 = cs
#提取真正的因子的数量
lengthfactor = length(levels(cs1))
#提取每个因子对应的数量
cs3 = summary(as.factor(cs1))
cs4 = as.data.frame(cs3)
cs4$id = row.names(cs4)
#对因子进行排序
df_arrange<- arrange(cs4, id)
#对Taxonomies_x 对应的列进行排序
Taxonomies_x1<- arrange(Taxonomies_x , aa)
head(Taxonomies_x1)
#构建flow的映射列Taxonomies_x
Taxonomies_x1$ID = factor(rep(c(1:lengthfactor), cs4$cs3))
#colour = "black",size = 2,,aes(color = "black",size = 0.8)
head(Taxonomies_x1)
p3 = ggplot(Taxonomies_x1,
aes(x = group, stratum = aa, alluvium = ID,
weight = Abundance,
fill = aa, label = aa)) +
geom_flow(stat = "alluvium", lode.guidance = "rightleft",
color = "black",size = 0.2,width = 0.3,alpha = .2) +
geom_bar(width = 0.45)+
geom_stratum(width = 0.45,size = 0.2) +
#geom_text(stat = "stratum", size = 3,family="Times New Roman",fontface = "bold.italic") +
#theme(legend.position = "none") +
scale_fill_manual(values = colors)+
#ggtitle("fow_plot")+
# scale_x_discrete(limits = axis_order)+
# geom_text(aes(y = label_y, label = label ),size = 4,fontface = "bold.italic")+
labs(x="group",
y="Relative abundancce (%)",
title="")
p3
if (is.na(axis_order)) {
p3 = p3
}else{
p3 = p3 +scale_x_discrete(limits = axis_order)
}
# p3
if (label == TRUE) {
p3 = p3 +
geom_text(aes(y = label_y, label = label ),size = 4,fontface = "bold.italic")
}
if (sd == TRUE) {
p3 = p3 +
geom_errorbar(aes(ymin=label_sd-sd, ymax=label_sd +sd), width=.2)
}
p3 =p3+theme_bw()+
scale_y_continuous(expand = c(0,0))+
#geom_hline(aes(yintercept=0), colour="black", linetype=2) +
#geom_vline(aes(xintercept=0), colour="black", linetype="dashed") +
#scale_fill_manual(values = mi, guide = guide_legend(title = NULL))+
theme(
panel.grid.major=element_blank(),
panel.grid.minor=element_blank(),
text=element_text(face = "bold"),
plot.title = element_text(vjust = -8.5,hjust = 0.1),
axis.title.y =element_text(size = 20,face = "bold",colour = "black"),
axis.title.x =element_text(size = 24,face = "bold",colour = "black"),
axis.text = element_text(size = 20,face = "bold"),
axis.text.x = element_text(colour = "black",size = 14),
axis.text.y = element_text(colour = "black",size = 14),
legend.text = element_text(size = 15,face = "bold.italic")
#legend.position = "none"#是否删除图例
)
p3
if (length(unique(map$Group))>3){ p3=p3+theme(axis.text.x=element_text(angle=45,vjust=1, hjust=1))}
return(list(p4,Taxonomies_x,p3))
}