微生物门类堆叠柱状图-冲击图-在R4.0更新版本

写在前面2020年12月

在R4.0 更新后,我之前的barMainplot函数中冲击图部分不能很好地运行,是由于dplyr版本更新后产生的问题,我将这部分做了更新,并且将颜色等映射去除,方便大家在出图后重新填充颜色,注意fill参数,不能用colour,没有这个参数。

library(phyloseq)library(tidyverse)library(vegan)library(reshape2)library("plyr")library(ggalluvial)library(ggplot2)
library(ggClusterNet)data(ps)
result <- barMainplot (ps = ps,group = "Group", j = "Phylum",rep = 6,axis_ord = NULL,label = TRUE ,sd = FALSE,Top = 10,tran = TRUE)
library(patchwork)result[[1]] + result[[3]]
result[[1]]+ scale_fill_brewer()  + result[[3]] + scale_fill_brewer()

写在前面

无论是堆叠柱状图,还是近年来会扩展的冲击图。基本都只能对门水平物种多样性进行可视化。然而即使是门水平,也不一定是全部的样本都适合使用堆叠柱状图可视化。

尤其是土壤等复杂的微生物群落的环境,往往门水平的物种数量也在50个左右,或者有七八十个也是常见的。而堆叠柱状图一般可以展示的也就10个左右。过多无论是图例,还是颜色,都无法进行很好的安排和调配。

所以我们一般情况下展示的,都是主要的门类,也确实10个左右的门类基本代表了大部分的微生物。但在分析过程中却存在较大问题。

当我们选择这些主要的门类的时候存在两种选择:

  • 对全部门类做相对丰度转化,并提取主要的门类物种展示,但必然堆叠柱状图的纵坐标达不到100%。

    这样做丰度评估当然是十分准确的,但却不好看。

  • 还有一个选择,我们对全部otu表格标准化后,提取主要的微生物门类,得到门类信息的sub表格后,再次标准化,然后就可以达到100%效果的y轴。

    但是这样对主要微生物门类的丰度存在不同程度的变形,也就是说门类丰度展现的不够准确了。

    所以不能做删减微生物门类,只能通过合并低丰度微生物门类达到效果,方可准确无误展示丰度

实战

第一种方式 y周丰度参差不齐

第一种方式直接参考这里

微生信生物新年放大招:一条代码完成堆叠柱状图-冲击图的操作-终结版

第二种方式 重新标准化 Y轴统一100%

我们是对于分析时认真的,所以也构建了一个函数,用于操作,参见文章末尾。

选择Top10:也就是相对丰度排名前十位的门水平物种展示,其余全部合并并作为others。所以一共展示了11种。

ps = readRDS("../data//ps_liu.rds")
result = barMainplot(ps = ps,j = "Phylum",rep = 6,axis_ord = NULL,label = FALSE ,sd = FALSE,Top = 10)
result[[1]]
result[[2]]

欢迎加入微生信生物

快来微生信生物

微生信生物

函数


library(phyloseq)library(tidyverse)library(vegan)library(reshape2)library("plyr")library(ggalluvial)library(ggplot2)

barMainplot = function(otu = NULL,tax = NULL,map = NULL,tree = NULL ,ps = NULL,group = "Group", j = "Phylum",rep = 6,axis_ord = NULL,label = TRUE ,sd = FALSE,Top = 10,tran = TRUE){ if (is.null(axis_ord)) { axis_order = NA }else{ axis_order = strsplit(basename(axis_ord), "-")[[1]] } ps = inputMicro(otu,tax,map,tree,ps,group = group) psdata = ps %>% tax_glom(taxrank = j) # transform to relative abundance if (tran == TRUE) { psdata = psdata%>% transform_sample_counts(function(x) {x/sum(x)} ) } otu = otu_table(psdata) tax = tax_table(psdata) for (i in 1:dim(tax)[1]) { if (row.names(tax)[i] %in% names(sort(rowSums(otu), decreasing = TRUE)[1:Top])) { tax[i,j] =tax[i,j] } else { tax[i,j]= "others" } } tax_table(psdata)= tax Taxonomies <- psdata %>% # Transform to rel. abundance psmelt() # 这里我们看到有很过属,因此颜色上就会出现不能很好区分的现象 colbar <- dim(unique(dplyr::select(Taxonomies, one_of(j))))[1] # colors = colorRampPalette(c( "#CBD588", "#599861", "orange","#DA5724", "#508578", "#CD9BCD", # "#AD6F3B", "#673770","#D14285", "#652926", "#C84248", # "#8569D5", "#5E738F","#D1A33D", "#8A7C64","black"))(colbar) Taxonomies$Abundance = Taxonomies$Abundance * 100 Taxonomies$Abundance = Taxonomies$Abundance/rep #按照分组求均值 colnames(Taxonomies) <- gsub(j,"aa",colnames(Taxonomies)) by_cyl <- dplyr::group_by(Taxonomies, Group,aa) zhnagxu2 = dplyr::summarise(by_cyl, sum(Abundance), sd(Abundance)) iris_groups<- dplyr::group_by(Taxonomies, aa) cc<- dplyr::summarise(iris_groups, sum(Abundance)) head(cc) colnames(cc)= c("aa","allsum") cc<- dplyr::arrange(cc, desc(allsum)) ##使用这个属的因子对下面数据进行排序 colnames(zhnagxu2) <- c("group","aa","Abundance","sd") zhnagxu2$aa = factor(zhnagxu2$aa,order = T,levels = cc$aa) zhnagxu3 = zhnagxu2 ##制作标签坐标,标签位于顶端 # Taxonomies_x = ddply(zhnagxu3,"group", summarize, label_y = cumsum(Abundance)) # head(Taxonomies_x ) #标签位于中部 # Taxonomies_x1 = ddply(zhnagxu3,"group", transform, label_y = cumsum(Abundance) - 0.5*Abundance) Taxonomies_x = plyr::ddply(zhnagxu3,"group", summarize,label_sd = cumsum(Abundance),label_y = cumsum(Abundance) - 0.5*Abundance) # Taxonomies_x$label_y = Taxonomies_x = cbind(as.data.frame(zhnagxu3),as.data.frame(Taxonomies_x)[,-1]) head(Taxonomies_x,6 ) Taxonomies_x$label = Taxonomies_x$aa # #使用循环将堆叠柱状图柱子比较窄的别写标签,仅仅宽柱子写上标签 # for(i in 1:nrow(Taxonomies_x)){ # if(Taxonomies_x[i,3] > 3){ # Taxonomies_x[i,5] = Taxonomies_x[i,5] # }else{ # Taxonomies_x[i,5] = NA # } # } Taxonomies_x$aa = factor(Taxonomies_x$aa,order = T,levels = c(as.character(cc$aa))) ##普通柱状图 p4 <- ggplot(Taxonomies_x , aes(x = group, y = Abundance, fill = aa, order = aa)) + geom_bar(stat = "identity",width = 0.5,color = "black") + # scale_fill_manual(values = colors) + theme(axis.title.x = element_blank()) + theme(legend.text=element_text(size=6)) + scale_y_continuous(name = "Relative abundance (%)") + guides(fill = guide_legend(title = j)) + labs(x="",y="Relative abundance (%)", title=paste("Top ",Top," ",j,sep = "")) p4 if (is.na(axis_order)) { p4 = p4 }else{ p4 = p4 + scale_x_discrete(limits = axis_order) } if (sd == TRUE) { p4 = p4 + geom_errorbar(aes(ymin=label_sd-sd, ymax=label_sd +sd), width=.2) } if (label == TRUE) { p4 = p4 + geom_text(aes(y = label_y, label = label )) } map = as.data.frame(sample_data(ps)) if (length(unique(map$Group))>3){p4 = p4 + theme(axis.text.x=element_text(angle=45,vjust=1, hjust=1))} #-------------冲击图 cs = Taxonomies_x $aa lengthfactor <- cs %>% levels() %>% length() cs4 <- cs %>% as.factor() %>% summary() %>% as.data.frame() cs4$id = row.names(cs4) #对因子进行排序 df_arrange<- arrange(cs4, id) #对Taxonomies_x 对应的列进行排序 Taxonomies_x1<- arrange(Taxonomies_x , aa) head(Taxonomies_x1) #构建flow的映射列Taxonomies_x Taxonomies_x1$ID = factor(rep(c(1:lengthfactor), cs4$.)) #colour = "black",size = 2,,aes(color = "black",size = 0.8) head(Taxonomies_x1) Taxonomies_x1$Abundance p3 <- ggplot(Taxonomies_x1, aes(x = group, y = Abundance,fill = aa,alluvium = aa,stratum = ID)) + geom_flow(aes(fill = aa, colour = aa), stat = "alluvium", lode.guidance = "rightleft", color = "black",size = 0.2,width = 0.35,alpha = .2) + geom_bar(width = 0.45,stat = "identity") + labs(x="",y="Relative abundance (%)", title=paste("Top ",Top," ",j,sep = "")) + guides(fill = guide_legend(title = j),color = FALSE) + scale_y_continuous(expand = c(0,0)) p3 # flower plot p1 <- ggplot(Taxonomies_x1, aes(x = group, alluvium = aa, y = Abundance)) + geom_flow(aes(fill = aa, colour = aa), width = 0) + labs(x="",y="Relative abundance (%)", title=paste("Top ",Top," ",j,sep = "")) + guides(fill = guide_legend(title = j),color = FALSE) + scale_y_continuous(expand = c(0,0)) if (is.na(axis_order)) { p1 = p1 p3 = p3 }else{ p1 = p1 +scale_x_discrete(limits = axis_order) p3 = p3 +scale_x_discrete(limits = axis_order) } # p3 if (label == TRUE) { p1 = p1 + geom_text(aes(y = label_y, label = label )) p3 = p3 + geom_text(aes(y = label_y, label = label )) } if (sd == TRUE) { p1 = p1 + geom_errorbar(aes(ymin=label_sd-sd, ymax=label_sd +sd), width=.2) p3 = p3 + geom_errorbar(aes(ymin=label_sd-sd, ymax=label_sd +sd), width=.2) } if (length(unique(map$Group))>3){ p3=p3+theme(axis.text.x=element_text(angle=45,vjust=1, hjust=1))} return(list(p4,Taxonomies_x,p3,p1))}
(0)

相关推荐