微生物门类堆叠柱状图-冲击图-在R4.0更新版本
写在前面2020年12月
在R4.0 更新后,我之前的barMainplot函数中冲击图部分不能很好地运行,是由于dplyr版本更新后产生的问题,我将这部分做了更新,并且将颜色等映射去除,方便大家在出图后重新填充颜色,注意fill参数,不能用colour,没有这个参数。
library(phyloseq)
library(tidyverse)
library(vegan)
library(reshape2)
library("plyr")
library(ggalluvial)
library(ggplot2)
library(ggClusterNet)
data(ps)
result <- barMainplot (ps = ps,group = "Group",
j = "Phylum",rep = 6,axis_ord = NULL,label = TRUE ,sd = FALSE,Top = 10,tran = TRUE)
library(patchwork)
result[[1]] + result[[3]]
result[[1]]+ scale_fill_brewer() + result[[3]] + scale_fill_brewer()
写在前面
无论是堆叠柱状图,还是近年来会扩展的冲击图。基本都只能对门水平物种多样性进行可视化。然而即使是门水平,也不一定是全部的样本都适合使用堆叠柱状图可视化。
尤其是土壤等复杂的微生物群落的环境,往往门水平的物种数量也在50个左右,或者有七八十个也是常见的。而堆叠柱状图一般可以展示的也就10个左右。过多无论是图例,还是颜色,都无法进行很好的安排和调配。
所以我们一般情况下展示的,都是主要的门类,也确实10个左右的门类基本代表了大部分的微生物。但在分析过程中却存在较大问题。
当我们选择这些主要的门类的时候存在两种选择:
对全部门类做相对丰度转化,并提取主要的门类物种展示,但必然堆叠柱状图的纵坐标达不到100%。
这样做丰度评估当然是十分准确的,但却不好看。
还有一个选择,我们对全部otu表格标准化后,提取主要的微生物门类,得到门类信息的sub表格后,再次标准化,然后就可以达到100%效果的y轴。
但是这样对主要微生物门类的丰度存在不同程度的变形,也就是说门类丰度展现的不够准确了。
所以不能做删减微生物门类,只能通过合并低丰度微生物门类达到效果,方可准确无误展示丰度
实战
第一种方式 y周丰度参差不齐
第一种方式直接参考这里
微生信生物新年放大招:一条代码完成堆叠柱状图-冲击图的操作-终结版
第二种方式 重新标准化 Y轴统一100%
我们是对于分析时认真的,所以也构建了一个函数,用于操作,参见文章末尾。
选择Top10:也就是相对丰度排名前十位的门水平物种展示,其余全部合并并作为others。所以一共展示了11种。
ps = readRDS("../data//ps_liu.rds")
result = barMainplot(ps = ps,j = "Phylum",rep = 6,axis_ord = NULL,label = FALSE ,sd = FALSE,Top = 10)
result[[1]]
result[[2]]
欢迎加入微生信生物
快来微生信生物
函数
library(phyloseq)
library(tidyverse)
library(vegan)
library(reshape2)
library("plyr")
library(ggalluvial)
library(ggplot2)
barMainplot = function(otu = NULL,tax = NULL,map = NULL,tree = NULL ,ps = NULL,group = "Group",
j = "Phylum",rep = 6,axis_ord = NULL,label = TRUE ,sd = FALSE,Top = 10,tran = TRUE){
if (is.null(axis_ord)) {
axis_order = NA
}else{
axis_order = strsplit(basename(axis_ord), "-")[[1]]
}
ps = inputMicro(otu,tax,map,tree,ps,group = group)
psdata = ps %>%
tax_glom(taxrank = j)
# transform to relative abundance
if (tran == TRUE) {
psdata = psdata%>%
transform_sample_counts(function(x) {x/sum(x)} )
}
otu = otu_table(psdata)
tax = tax_table(psdata)
for (i in 1:dim(tax)[1]) {
if (row.names(tax)[i] %in% names(sort(rowSums(otu), decreasing = TRUE)[1:Top])) {
tax[i,j] =tax[i,j]
} else {
tax[i,j]= "others"
}
}
tax_table(psdata)= tax
Taxonomies <- psdata %>% # Transform to rel. abundance
psmelt()
# 这里我们看到有很过属,因此颜色上就会出现不能很好区分的现象
colbar <- dim(unique(dplyr::select(Taxonomies, one_of(j))))[1]
# colors = colorRampPalette(c( "#CBD588", "#599861", "orange","#DA5724", "#508578", "#CD9BCD",
# "#AD6F3B", "#673770","#D14285", "#652926", "#C84248",
# "#8569D5", "#5E738F","#D1A33D", "#8A7C64","black"))(colbar)
Taxonomies$Abundance = Taxonomies$Abundance * 100
Taxonomies$Abundance = Taxonomies$Abundance/rep
#按照分组求均值
colnames(Taxonomies) <- gsub(j,"aa",colnames(Taxonomies))
by_cyl <- dplyr::group_by(Taxonomies, Group,aa)
zhnagxu2 = dplyr::summarise(by_cyl, sum(Abundance), sd(Abundance))
iris_groups<- dplyr::group_by(Taxonomies, aa)
cc<- dplyr::summarise(iris_groups, sum(Abundance))
head(cc)
colnames(cc)= c("aa","allsum")
cc<- dplyr::arrange(cc, desc(allsum))
##使用这个属的因子对下面数据进行排序
colnames(zhnagxu2) <- c("group","aa","Abundance","sd")
zhnagxu2$aa = factor(zhnagxu2$aa,order = T,levels = cc$aa)
zhnagxu3 = zhnagxu2
##制作标签坐标,标签位于顶端
# Taxonomies_x = ddply(zhnagxu3,"group", summarize, label_y = cumsum(Abundance))
# head(Taxonomies_x )
#标签位于中部
# Taxonomies_x1 = ddply(zhnagxu3,"group", transform, label_y = cumsum(Abundance) - 0.5*Abundance)
Taxonomies_x = plyr::ddply(zhnagxu3,"group", summarize,label_sd = cumsum(Abundance),label_y = cumsum(Abundance) - 0.5*Abundance)
# Taxonomies_x$label_y =
Taxonomies_x = cbind(as.data.frame(zhnagxu3),as.data.frame(Taxonomies_x)[,-1])
head(Taxonomies_x,6 )
Taxonomies_x$label = Taxonomies_x$aa
# #使用循环将堆叠柱状图柱子比较窄的别写标签,仅仅宽柱子写上标签
# for(i in 1:nrow(Taxonomies_x)){
# if(Taxonomies_x[i,3] > 3){
# Taxonomies_x[i,5] = Taxonomies_x[i,5]
# }else{
# Taxonomies_x[i,5] = NA
# }
# }
Taxonomies_x$aa = factor(Taxonomies_x$aa,order = T,levels = c(as.character(cc$aa)))
##普通柱状图
p4 <- ggplot(Taxonomies_x , aes(x = group, y = Abundance, fill = aa, order = aa)) +
geom_bar(stat = "identity",width = 0.5,color = "black") +
# scale_fill_manual(values = colors) +
theme(axis.title.x = element_blank()) +
theme(legend.text=element_text(size=6)) +
scale_y_continuous(name = "Relative abundance (%)") +
guides(fill = guide_legend(title = j)) +
labs(x="",y="Relative abundance (%)",
title=paste("Top ",Top," ",j,sep = ""))
p4
if (is.na(axis_order)) {
p4 = p4
}else{
p4 = p4 + scale_x_discrete(limits = axis_order)
}
if (sd == TRUE) {
p4 = p4 +
geom_errorbar(aes(ymin=label_sd-sd, ymax=label_sd +sd), width=.2)
}
if (label == TRUE) {
p4 = p4 +
geom_text(aes(y = label_y, label = label ))
}
map = as.data.frame(sample_data(ps))
if (length(unique(map$Group))>3){p4 = p4 + theme(axis.text.x=element_text(angle=45,vjust=1, hjust=1))}
#-------------冲击图
cs = Taxonomies_x $aa
lengthfactor <- cs %>%
levels() %>%
length()
cs4 <- cs %>%
as.factor() %>%
summary() %>%
as.data.frame()
cs4$id = row.names(cs4)
#对因子进行排序
df_arrange<- arrange(cs4, id)
#对Taxonomies_x 对应的列进行排序
Taxonomies_x1<- arrange(Taxonomies_x , aa)
head(Taxonomies_x1)
#构建flow的映射列Taxonomies_x
Taxonomies_x1$ID = factor(rep(c(1:lengthfactor), cs4$.))
#colour = "black",size = 2,,aes(color = "black",size = 0.8)
head(Taxonomies_x1)
Taxonomies_x1$Abundance
p3 <- ggplot(Taxonomies_x1, aes(x = group, y = Abundance,fill = aa,alluvium = aa,stratum = ID)) +
geom_flow(aes(fill = aa, colour = aa),
stat = "alluvium", lode.guidance = "rightleft",
color = "black",size = 0.2,width = 0.35,alpha = .2) +
geom_bar(width = 0.45,stat = "identity") +
labs(x="",y="Relative abundance (%)",
title=paste("Top ",Top," ",j,sep = "")) +
guides(fill = guide_legend(title = j),color = FALSE) +
scale_y_continuous(expand = c(0,0))
p3
# flower plot
p1 <- ggplot(Taxonomies_x1,
aes(x = group, alluvium = aa, y = Abundance)) +
geom_flow(aes(fill = aa, colour = aa), width = 0) +
labs(x="",y="Relative abundance (%)",
title=paste("Top ",Top," ",j,sep = "")) +
guides(fill = guide_legend(title = j),color = FALSE) +
scale_y_continuous(expand = c(0,0))
if (is.na(axis_order)) {
p1 = p1
p3 = p3
}else{
p1 = p1 +scale_x_discrete(limits = axis_order)
p3 = p3 +scale_x_discrete(limits = axis_order)
}
# p3
if (label == TRUE) {
p1 = p1 +
geom_text(aes(y = label_y, label = label ))
p3 = p3 +
geom_text(aes(y = label_y, label = label ))
}
if (sd == TRUE) {
p1 = p1 +
geom_errorbar(aes(ymin=label_sd-sd, ymax=label_sd +sd), width=.2)
p3 = p3 +
geom_errorbar(aes(ymin=label_sd-sd, ymax=label_sd +sd), width=.2)
}
if (length(unique(map$Group))>3){ p3=p3+theme(axis.text.x=element_text(angle=45,vjust=1, hjust=1))}
return(list(p4,Taxonomies_x,p3,p1))
}