2-easy_microbiome项目-微生物群落多样性分析
phy_alpha_beta_bar_ven
wentao
分析前准备
#清空内存#######
rm(list=ls())
#堆叠柱状图参数设置
#a为输出文件主名
a <- "CS_16s"
#j选择使用哪个等级数据来做堆叠柱状图
j = "Phylum"
#j = "Class"
#j = "Order"
#j = "Family"
#j = "Genus"
##k 是否过滤或者设置过滤值为多少
k= 0.01
##重复数量
rep = 4
#韦恩图设置参数
num = 4
path = "./phyloseq_pipline1/"
dir.create(path)
## 导入数据
ps = readRDS("./a3_DADA2_table//ps.rds")
ps## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 2525 taxa and 24 samples ]
## sample_data() Sample Data: [ 24 samples by 14 sample variables ]
## tax_table() Taxonomy Table: [ 2525 taxa by 6 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 2525 tips and 2524 internal nodes ]# mapping = as.data.frame(sample_data(ps))
# unique(mapping$SampleType)
#
# head(mapping)
#
# mapping = read.delim("./CS.txt")
# head(mapping)
# mapping$ID = gsub(".fastq","",mapping$ID)
# mapping$ID = paste(mapping$ID,"A",sep = "")
# row.names(mapping) = mapping$ID
# head(mapping)
# colnames(mapping) = c("ID","SampleType","trt","final_SampleType")
#
# sample_data(ps) = mapping
# sample_data(ps) = mapping
# saveRDS(ps,"./ps.rds")
ps1 = ps
##按照最小序列数抽平
total = min(sample_sums(ps1));total## [1] 929
standf = function(x,t = total)round(t*(x/sum(x)))
ps11 = transform_sample_counts(ps1,standf)
#柱状图坐标轴排序顺序
mapping = as.data.frame(sample_data(ps))
unique(mapping$SampleType)## [1] C D B A
## Levels: A B C Dhead(mapping)
## ID SampleType env1 env2 env3 env4 env5 env6 env7
## SRR8247254 SRR8247254 C 7.25 13.35 1.55 8.64 131.44 50.64 13.83
## SRR8247256 SRR8247256 D 6.65 12.20 1.47 8.31 130.21 38.24 12.70
## SRR8247258 SRR8247258 D 6.74 13.13 1.47 8.91 124.07 39.26 13.84
## SRR8247260 SRR8247260 B 7.46 11.90 1.32 9.00 124.07 16.90 12.13
## SRR8247262 SRR8247262 B 7.26 11.50 1.32 8.68 103.19 5.72 12.55
## SRR8247264 SRR8247264 C 7.49 13.00 1.59 8.20 108.10 44.66 12.82
## env8 env9 env10 env11 env12
## SRR8247254 67.69 599.74 5.72 29.71 32.27
## SRR8247256 53.29 518.65 3.69 23.23 26.38
## SRR8247258 59.29 556.72 4.71 25.86 28.73
## SRR8247260 55.69 538.29 3.69 20.80 31.21
## SRR8247262 41.29 508.62 1.65 17.97 21.67
## SRR8247264 65.29 660.90 4.71 33.16 27.43#柱状图坐标轴排序顺序
axis_order = c("A","B","C","D")library("phyloseq")
library("ggplot2")
library("dada2")
library("tidyverse")
library("plyr"); packageVersion("plyr")## [1] '1.8.4'
library("vegan")
library("ggpubr")
library("scales")
mi=c("#1B9E77" ,"#D95F02", "#7570B3","#E7298A","#E6AB02", "#B3DE69")
mythemeBETA <- theme_bw()+
theme(
panel.grid.major=element_blank(),
panel.grid.minor=element_blank(),
plot.title = element_text(vjust = -8.5,hjust = 0.1),
axis.title.y =element_text(size = 24,face = "bold",colour = "black"),
axis.title.x =element_text(size = 24,face = "bold",colour = "black"),
axis.text = element_text(size = 20,face = "bold"),
axis.text.x = element_text(colour = "black",size = 14),
axis.text.y = element_text(colour = "black",size = 14),
legend.text = element_text(size = 15,face = "bold")
#legend.position = "none"#是否删除图例
)
mythemealpha <- theme_bw()+
#theme_classic()+
# scale_color_manual(values = mi, guide = guide_legend(title = NULL))+
# scale_fill_manual(values = mi, guide = guide_legend(title = NULL))+
theme(
panel.grid.major=element_blank(),
panel.grid.minor=element_blank(),
plot.title = element_text(vjust = -8.5,hjust = 0.1),
axis.title.y =element_text(size = 20,face = "bold",colour = "black"),
axis.title.x =element_text(size = 24,face = "bold",colour = "black"),
axis.text = element_text(size = 20,face = "bold"),
axis.text.x = element_text(colour = "black",size = 14),
axis.text.y = element_text(colour = "black",size = 14),
legend.text = element_text(size = 15,face = "bold"))+
#theme(legend.position = c(0.1,0.2))+
theme(strip.text.x = element_text(size=15, angle=0),
strip.text.y = element_text(size=12, face="bold"),
strip.background = element_rect(colour="blue", fill="#CCCCFF"))
导入数据 处理数据
绘制堆叠柱状图表示不同分类等级信息
library(extrafont)
loadfonts(device="win") #Register fonts for Windows bitmap output
fonts()# ps = readRDS("./ps.rds")
ps1 = ps
i = ps1
#head(otu_table(ps2),n = 20L)
library("tidyverse")
colnames(tax_table(ps1))## [1] "Kingdom" "Phylum" "Class" "Order" "Family" "Genus"
##这里我们过滤一定阈值的otu,会出现最后堆叠柱状图总体丰度高于100%的情况,这是合理的
###########绘制不同分类等级的柱状图
Taxonomies <- i %>%
tax_glom(taxrank = j) %>% # agglomerate at Class level Class
transform_sample_counts(function(x) {x/sum(x)} )%>%# Transform to rel. abundance
psmelt() %>% # Melt to long format
filter(Abundance >= k) %>% # Filter out low abundance taxa
arrange(Phylum)
# head(Taxonomies)
# dim(Taxonomies)
colbar <- dim(unique(select(Taxonomies, one_of(j))))[1]
library("scales")
Phylum_colors = colorRampPalette(c( "#CBD588", "#599861", "orange","#DA5724", "#508578", "#CD9BCD",
"#AD6F3B", "#673770","#D14285", "#652926", "#C84248",
"#8569D5", "#5E738F","#D1A33D", "#8A7C64","black"))(colbar)
mi = colorRampPalette(c( "#CBD588", "#599861", "orange","#DA5724", "#508578", "#CD9BCD",
"#AD6F3B", "#673770","#D14285", "#652926", "#C84248",
"#8569D5", "#5E738F","#D1A33D", "#8A7C64","black"))(colbar)
# 显示颜色和对应16进制RGB代码
show_col(Phylum_colors)
Taxonomies$Abundance = Taxonomies$Abundance * 100
Taxonomies$Abundance = Taxonomies$Abundance/rep
# head(Taxonomies)
#按照分组求均值
colnames(Taxonomies) <- gsub(j,"aa",colnames(Taxonomies))
by_cyl <- group_by(Taxonomies, SampleType,aa)
zhnagxu2 = dplyr :: summarise(by_cyl, sum(Abundance))
#colnames(zhnagxu2) = c("group", j,"Abundance")
# head(zhnagxu2)
##确定因子,这里通过求和按照从小到大的顺序得到因子
##长变宽
library(reshape2)
# head(Taxonomies)
Taxonomies2 = dcast(Taxonomies,aa ~ Sample,value.var = "Abundance")
head(Taxonomies2)Taxonomies2[is.na(Taxonomies2)] <- 0
aa = Taxonomies2
# head(aa)
n = ncol(aa)
#增加一行,为整列的均值,计算每一列的均值,2就是表示列
aa[n+1]=apply(aa[,c(2:ncol(aa))],1,sum)
colnames(aa)[n+1] <- c("allsum")
# str(aa)
bb<- arrange(aa, allsum)
# head(bb)
bb = bb[c(1,ncol(bb))]
cc<- arrange(bb, desc(allsum))
# head(cc)
##使用这个属的因子对下面数据进行排序
library("plyr")
head(zhnagxu2)## # A tibble: 6 x 3
## # Groups: SampleType [1]
## SampleType aa `sum(Abundance)`
## <fct> <fct> <dbl>
## 1 A Acidobacteria 5.10
## 2 A Actinobacteria 7.99
## 3 A Chloroflexi 50.6
## 4 A Cyanobacteria 2.60
## 5 A Deinococcus-Thermus 0.622
## 6 A Firmicutes 2.97colnames(zhnagxu2) <- c("group","aa","Abundance")
zhnagxu2$aa = factor(zhnagxu2$aa,order = T,levels = cc$aa)
zhnagxu3 = plyr::arrange(zhnagxu2,desc(aa))
# head(zhnagxu3)
##制作标签坐标,标签位于顶端
# Taxonomies_x = ddply(zhnagxu3,"group", transform, label_y = cumsum(Abundance))
# head(Taxonomies_x )
#标签位于中部
Taxonomies_x = ddply(zhnagxu3,"group", transform, label_y = cumsum(Abundance) - 0.5*Abundance)
head(Taxonomies_x,20 )## group aa Abundance label_y
## 1 A Planctomycetes 1.2965580 0.6482790
## 2 A Microgenomates 0.2849544 1.4390352
## 3 A Cyanobacteria 2.6042935 2.8836592
## 4 A Firmicutes 2.9693819 5.6704969
## 5 A Deinococcus-Thermus 0.6215041 7.4659399
## 6 A Gemmatimonadetes 3.4698722 9.5116280
## 7 A Acidobacteria 5.0989307 13.7960295
## 8 A Actinobacteria 7.9877526 20.3393711
## 9 A Parcubacteria 11.6703206 30.1684077
## 10 A Saccharibacteria 14.2663942 43.1367651
## 11 A Chloroflexi 50.6114002 75.5756623
## 12 A Proteobacteria 44.8961584 123.3294416
## 13 B WWE3 0.3261243 0.1630621
## 14 B Planctomycetes 0.5804491 0.6163488
## 15 B Microgenomates 1.0740402 1.4435934
## 16 B Cyanobacteria 1.1654740 2.5633505
## 17 B Firmicutes 1.9080327 4.1001038
## 18 B Deinococcus-Thermus 1.0834642 5.5958523
## 19 B Gemmatimonadetes 4.3904122 8.3327905
## 20 B Acidobacteria 6.1540403 13.6050168Taxonomies_x$label = Taxonomies_x$aa
#使用循环将堆叠柱状图柱子比较窄的别写标签,仅仅宽柱子写上标签
for(i in 1:nrow(Taxonomies_x)){
if(Taxonomies_x[i,3] > 3){
Taxonomies_x[i,5] = Taxonomies_x[i,5]
}else{
Taxonomies_x[i,5] = NA
}
}
library(ggalluvial)
library(ggplot2)
##普通柱状图
p4 <- ggplot(Taxonomies_x , aes(x = group, y = Abundance, fill = aa, order = aa)) +
geom_bar(stat = "identity",width = 0.5,color = "black") +
scale_fill_manual(values = Phylum_colors) +
theme(axis.title.x = element_blank()) +
theme(legend.text=element_text(size=6)) +
scale_y_continuous(name = "Abundance (%)")+
scale_x_discrete(limits = axis_order)+
geom_text(aes(y = label_y, label = label ),size = 4,family="Times New Roman",fontface = "bold.italic")
# print(p4)
# install.packages("ggalluvial")
p4 =p4+theme_bw()+
scale_y_continuous(expand = c(0,0))+
#geom_hline(aes(yintercept=0), colour="black", linetype=2) +
#geom_vline(aes(xintercept=0), colour="black", linetype="dashed") +
#scale_fill_manual(values = mi, guide = guide_legend(title = NULL))+
theme(
panel.grid.major=element_blank(),
panel.grid.minor=element_blank(),
text=element_text(family="Times New Roman",face = "bold"),
plot.title = element_text(vjust = -8.5,hjust = 0.1),
axis.title.y =element_text(size = 20,face = "bold",colour = "black"),
axis.title.x =element_text(size = 24,face = "bold",colour = "black"),
axis.text = element_text(size = 20,face = "bold",family = "Times"),
axis.text.x = element_text(colour = "black",size = 14,family="Times New Roman"),
axis.text.y = element_text(colour = "black",size = 14,family="Times New Roman"),
legend.text = element_text(size = 15,face = "bold.italic")
#legend.position = "none"#是否删除图例
)
p4
FileName1 <- paste(path,"/a2_",j,a,"_bar",".pdf", sep = "")
ggsave(FileName1, p4, width = 12, height =8, device = cairo_pdf, family = "Times New Roman" )
##柱状图冲击图
#stratum定义堆叠柱状图柱子内容,以weight定义柱子长度,alluvium定义连线
head(Taxonomies_x )## group aa Abundance label_y label
## 1 A Planctomycetes 1.2965580 0.648279 <NA>
## 2 A Microgenomates 0.2849544 1.439035 <NA>
## 3 A Cyanobacteria 2.6042935 2.883659 <NA>
## 4 A Firmicutes 2.9693819 5.670497 <NA>
## 5 A Deinococcus-Thermus 0.6215041 7.465940 <NA>
## 6 A Gemmatimonadetes 3.4698722 9.511628 Gemmatimonadetescs = Taxonomies_x $aa
# head(cs)
# as.factor(Taxonomies_x $Genus)
# cs = as.character(Taxonomies_x $Genus)
# cs1 = as.factor(cs)
cs1 = cs
#提取真正的因子的数量
lengthfactor = length(levels(cs1))
#提取每个因子对应的数量
cs3 = summary (as.factor(cs1))
cs4 = as.data.frame(cs3)
cs4$id = row.names(cs4)
#对因子进行排序
df_arrange<- arrange(cs4, id)
#对Taxonomies_x 对应的列进行排序
Taxonomies_x1<- arrange(Taxonomies_x , aa)
head(Taxonomies_x1)## group aa Abundance label_y label
## 1 A Proteobacteria 44.89616 123.32944 Proteobacteria
## 2 B Proteobacteria 43.86277 125.71142 Proteobacteria
## 3 C Proteobacteria 49.91716 121.66343 Proteobacteria
## 4 D Proteobacteria 44.88647 123.58450 Proteobacteria
## 5 A Chloroflexi 50.61140 75.57566 Chloroflexi
## 6 B Chloroflexi 30.01323 88.77342 Chloroflexi#构建flow的映射列Taxonomies_x
Taxonomies_x1$ID = factor(rep(c(1:lengthfactor), cs4$cs3))
#colour = "black",size = 2,,aes(color = "black",size = 0.8)
p3 = ggplot(Taxonomies_x1,
aes(x = group, stratum = aa, alluvium = ID,
weight = Abundance,
fill = aa, label = aa)) +
geom_flow(stat = "alluvium", lode.guidance = "rightleft",
color = "black",size = 0.2,width = 0.3,alpha = .2) +
geom_bar(width = 0.45)+
geom_stratum(width = 0.45,size = 0.2) +
#geom_text(stat = "stratum", size = 3,family="Times New Roman",fontface = "bold.italic") +
#theme(legend.position = "none") +
scale_fill_manual(values = Phylum_colors)+
#ggtitle("fow_plot")+
scale_x_discrete(limits = axis_order)+
geom_text(aes(y = label_y, label = label ),size = 4,family="Times New Roman",fontface = "bold.italic")+
labs(x="group",
y="Relative abundancce (%)",
title="")
# p3
p3 =p3+theme_bw()+
scale_y_continuous(expand = c(0,0))+
#geom_hline(aes(yintercept=0), colour="black", linetype=2) +
#geom_vline(aes(xintercept=0), colour="black", linetype="dashed") +
#scale_fill_manual(values = mi, guide = guide_legend(title = NULL))+
theme(
panel.grid.major=element_blank(),
panel.grid.minor=element_blank(),
text=element_text(family="Times New Roman",face = "bold"),
plot.title = element_text(vjust = -8.5,hjust = 0.1),
axis.title.y =element_text(size = 20,face = "bold",colour = "black"),
axis.title.x =element_text(size = 24,face = "bold",colour = "black"),
axis.text = element_text(size = 20,face = "bold",family = "Times"),
axis.text.x = element_text(colour = "black",size = 14,family="Times New Roman"),
axis.text.y = element_text(colour = "black",size = 14,family="Times New Roman"),
legend.text = element_text(size = 15,face = "bold.italic")
#legend.position = "none"#是否删除图例
)
p3
FileName2 <- paste(path,"/a2_",j,a,"_bar_flow",".pdf", sep = "")
ggsave(FileName2, p3, width = 12, height =8, device = cairo_pdf, family = "Times New Roman" )
第二种方式alpha 使用microbiomeSeq包
这个包可以加上anova检验,但是我需要使用其他alpha多样性指标和非参数检验的话就没有办法了
library("microbiomeSeq")
library("ggplot2")
library("phyloseq")
physeq <- ps11
taxa_are_rows(physeq)## [1] FALSE
p <- plot_anova_diversity(physeq, method = c("richness", "simpson", "shannon"),
grouping_column = "SampleType", pValueCutoff = 0.05)
p = p +
scale_x_discrete(limits = axis_order)+
mythemealpha
if (length(unique(as.data.frame(sample_data(ps1))$SampleType))>3){ p=p+theme(axis.text.x=element_text(angle=45,vjust=1, hjust=1))}
xx = length(unique(as.data.frame(sample_data(ps1))$SampleType))
plotname = paste(path,"./a1_alpha_microbiomeSeq.pdf",sep = "")
ggsave(plotname, p, width = (4+xx/2)*2, height = (4+xx/2),limitsize = FALSE)
备注:alpha多样性指标分析表明JK1样品很可能出现错误
自编函数,用于alpha多样性指标计算和出图
添加microbiome包中的20多种alpha多样性指标,并且修改误差线,淡然需要载入microbiome
library("microbiome")
plot_alpha <- function (physeq, x = "samples", color = NULL, shape = NULL,
title = NULL, scales = "free_y", nrow = 1, shsi = NULL, measures = NULL, index = "all",
sortby = NULL)
{
erDF1 = estimate_richness(physeq, split = TRUE, measures = measures)
tab <- alpha(physeq, index = index)
erDF = cbind(erDF1,tab)
measures = colnames(erDF)
ses = colnames(erDF)[grep("^se\\.", colnames(erDF))]
measures = measures[!measures %in% ses]
if (!is.null(sample_data(physeq, errorIfNULL = FALSE))) {
DF <- data.frame(erDF, sample_data(physeq))
}
else {
DF <- data.frame(erDF)
}
if (!"samples" %in% colnames(DF)) {
DF$samples <- sample_names(physeq)
}
if (!is.null(x)) {
if (x %in% c("sample", "samples", "sample_names", "sample.names")) {
x <- "samples"
}
}
else {
x <- "samples"
}
mdf = reshape2::melt(DF, measure.vars = measures)
mdf$se <- NA_integer_
if (length(ses) > 0) {
selabs = ses
names(selabs) <- substr(selabs, 4, 100)
substr(names(selabs), 1, 1) <- toupper(substr(names(selabs),
1, 1))
mdf$wse <- sapply(as.character(mdf$variable), function(i,
selabs) {
selabs[i]
}, selabs)
for (i in 1:nrow(mdf)) {
if (!is.na(mdf[i, "wse"])) {
mdf[i, "se"] <- mdf[i, (mdf[i, "wse"])]
}
}
mdf <- mdf[, -which(colnames(mdf) %in% c(selabs, "wse"))]
}
if (!is.null(measures)) {
if (any(measures %in% as.character(mdf$variable))) {
mdf <- mdf[as.character(mdf$variable) %in% measures,
]
}
else {
warning("Argument to `measures` not supported. All alpha-diversity measures (should be) included in plot.")
}
}
if (!is.null(shsi)) {
warning("shsi no longer supported option in plot_richness. Please use `measures` instead")
}
if (!is.null(sortby)) {
if (!all(sortby %in% levels(mdf$variable))) {
warning("`sortby` argument not among `measures`. Ignored.")
}
if (!is.discrete(mdf[, x])) {
warning("`sortby` argument provided, but `x` not a discrete variable. `sortby` is ignored.")
}
if (all(sortby %in% levels(mdf$variable)) & is.discrete(mdf[,
x])) {
wh.sortby = which(mdf$variable %in% sortby)
mdf[, x] <- factor(mdf[, x], levels = names(sort(tapply(X = mdf[wh.sortby,
"value"], INDEX = mdf[wh.sortby, x], mean, na.rm = TRUE,
simplify = TRUE))))
}
}
richness_map = aes_string(x = x, y = "value", colour = color,
shape = shape)
p = ggplot(mdf, richness_map) + geom_point(na.rm = TRUE) +
stat_compare_means(comparisons=my_comparisons,label = "p.signif")+
stat_compare_means()
if (any(!is.na(mdf[, "se"]))) {
# p = p + geom_errorbar(aes(ymax = value + se, ymin = value -
# se), width = 0.1)
}
p = p + theme(axis.text.x = element_text(angle = -90, vjust = 0.5,
hjust = 0))
p = p + ylab("Alpha Diversity Measure")
p = p + facet_wrap(~variable, nrow = nrow, scales = scales)
if (!is.null(title)) {
p <- p + ggtitle(title)
}
return(p)
}
使用修改函数进行多样性计算和出图
library(ggpubr)
my_comparisons <- list(c("CF", "PK"),c("CK", "CD"),c("AB", "CD"))
p = plot_alpha(ps11, x="SampleType",color="SampleType", measures=c("Chao1", "Shannon","ACE"), index = "evenness_simpson")+
geom_boxplot(alpha=1, outlier.size=2, size=1, width=0.5,notchwidth=1) +
geom_jitter( position=position_jitter(0.17), size=2, alpha=0.7)+
labs(x="", y=paste("alpha diversity", sep = " "))
p = p+mythemealpha+
scale_color_manual(values = mi, guide = guide_legend(title = NULL))+
scale_fill_manual(values = mi, guide = guide_legend(title = NULL))+
scale_x_discrete(limits = axis_order)
p
if (length(unique(as.data.frame(sample_data(ps1))$SampleType))>3){ p=p+theme(axis.text.x=element_text(angle=45,vjust=1, hjust=1))}
plotname = paste(path,"/a1_alpha_final.pdf",sep = "")
ggsave(plotname, p, width = 24, height = 8)
beta多样性分析
ps1_rela = transform_sample_counts(ps, function(x) x / sum(x) );ps1_rela
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 2525 taxa and 24 samples ]
## sample_data() Sample Data: [ 24 samples by 14 sample variables ]
## tax_table() Taxonomy Table: [ 2525 taxa by 6 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 2525 tips and 2524 internal nodes ]unif <- distance(ps1_rela , method="bray", type="samples")
#这里请记住pcoa函数
pcoa = cmdscale(unif, k=2, eig=T) # k is dimension, 3 is recommended; eig is eigenvalues
points = as.data.frame(pcoa$points) # 获得坐标点get coordinate string, format to dataframme
colnames(points) = c("x", "y") #命名行名
eig = pcoa$eig
#eig特征值得到
sub_design = as.data.frame(sample_data(ps1_rela))
points = cbind(points, sub_design[match(rownames(points), rownames(sub_design)), ])
#write.table(points,"pcoa_bray_curtis.txt",quote = FALSE,row.names = F,
# col.names = T,sep = "\t")
head(points)## x y ID SampleType env1 env2 env3
## SRR8247254 0.27049339 0.19444201 SRR8247254 C 7.25 13.35 1.55
## SRR8247256 0.12694742 -0.07691397 SRR8247256 D 6.65 12.20 1.47
## SRR8247258 0.33916721 0.21905779 SRR8247258 D 6.74 13.13 1.47
## SRR8247260 0.35631194 -0.07071409 SRR8247260 B 7.46 11.90 1.32
## SRR8247262 0.05454201 0.23528846 SRR8247262 B 7.26 11.50 1.32
## SRR8247264 0.21752916 -0.35131890 SRR8247264 C 7.49 13.00 1.59
## env4 env5 env6 env7 env8 env9 env10 env11 env12
## SRR8247254 8.64 131.44 50.64 13.83 67.69 599.74 5.72 29.71 32.27
## SRR8247256 8.31 130.21 38.24 12.70 53.29 518.65 3.69 23.23 26.38
## SRR8247258 8.91 124.07 39.26 13.84 59.29 556.72 4.71 25.86 28.73
## SRR8247260 9.00 124.07 16.90 12.13 55.69 538.29 3.69 20.80 31.21
## SRR8247262 8.68 103.19 5.72 12.55 41.29 508.62 1.65 17.97 21.67
## SRR8247264 8.20 108.10 44.66 12.82 65.29 660.90 4.71 33.16 27.43ado = adonis(unif~ sub_design$SampleType,permutations = 999,method="bray")
a = round(as.data.frame(ado$aov.tab[5])[1,1],3)
R2 <- paste("adonis:R ",a, sep = "")
b = as.data.frame(ado$aov.tab[6])[1,1]
p_v = paste("p: ",b, sep = "")
title = paste(R2," ",p_v, sep = "")
title## [1] "adonis:R 0.149 p: 0.043"
# mi=c("#1B9E77" ,"#D95F02", "#7570B3","#E7298A","#E6AB02", "#B3DE69")
p2 <-ggplot(points, aes(x=x, y=y, fill = SampleType)) +
geom_point(alpha=.7, size=5, pch = 21) +
labs(x=paste("PCoA 1 (", format(100 * eig[1] / sum(eig), digits=4), "%)", sep=""),
y=paste("PCoA 2 (", format(100 * eig[2] / sum(eig), digits=4), "%)", sep=""),
title=title)+
stat_ellipse( linetype = 2,level = 0.65,aes(group =SampleType, colour = SampleType))+
#stat_ellipse( linetype = 1,level = 0.8)+
#geom_text_repel(aes(label=points$id),size=4)+
scale_colour_manual(values = mi,guide = guide_legend(title = NULL))+
scale_fill_manual(values = mi,guide = guide_legend(title = NULL))+
#labs(title = "toamto hea and dis")+
guides(color=guide_legend(title = NULL),shape=guide_legend(title = NULL))
# p2
# points$id=row.names(points)
# p+geom_text(aes(label=points$id),size=4)#?stat_ellipse
p2 = p2+theme_bw()+
#scale_y_continuous(expand = c(0,0))+
geom_hline(aes(yintercept=0), colour="black", linetype=2) +
geom_vline(aes(xintercept=0), colour="black", linetype="dashed") +
scale_fill_manual(values = mi, guide = guide_legend(title = NULL))+
theme(
panel.grid.major=element_blank(),
panel.grid.minor=element_blank(),
plot.title = element_text(vjust = -8.5,hjust = 0.1),
axis.title.y =element_text(size = 24,face = "bold",colour = "black"),
axis.title.x =element_text(size = 24,face = "bold",colour = "black"),
axis.text = element_text(size = 20,face = "bold"),
axis.text.x = element_text(colour = "black",size = 14),
axis.text.y = element_text(colour = "black",size = 14),
legend.text = element_text(size = 15,face = "bold")
#legend.position = "none"#是否删除图例
)
p2
# head(points)
# points$id=row.names(points)
# p2+geom_text(aes(label=points$id),size=4)
plotname = paste(path,"/a2_bray_PCOA.pdf",sep = "")
ggsave(plotname, p2, width = 12, height = 8)
值得庆幸的是我之前的代码都不需要修改,直接可以得到想要的结果 但是下面的韦恩图却不一样了 ### 做一张韦恩图 这里我设置了循环,大于6个分组将不做韦恩图,因为做了也看不清了,如果大于6组,本代码提供取子集的部分,可以用来测试代码
library("phyloseq")
ps = readRDS("./a3_DADA2_table//ps.rds")
ps1 = ps
# ps1 = subset_samples(ps, trt %in% c(1,5,15,41))
# ps1
# ps1
vegan_otu <- function(physeq){
OTU <- otu_table(physeq)
if(taxa_are_rows(OTU)){
OTU <- t(OTU)
}
return(as(OTU,"matrix"))
}
aa = vegan_otu(ps1)
otu_table = as.data.frame(t(aa))
count = aa
countA = count
# dim(count)
sub_design <- as.data.frame(sample_data(ps1))
# levels(sub_design$SampleType)[1]
# name1 = paste("name",levels(sub_design$SampleType)[1],sep = "")
sub_design $SampleType## [1] C D D B B C C B D C D D B C C A A A A A A B B D
## Levels: A B C Dlevels(sub_design $SampleType)
## [1] "A" "B" "C" "D"
##########这里的操作为提取三个分组
pick_val_num <- num*2/4
count[count > 0] <- 1###这个函数只能用于0,1 的数据,所以我这么转换
count2 = as.data.frame(count)
library("tibble")
#数据分组
iris.split <- split(count2,as.factor(sub_design$SampleType))
#数据分组计算平均值
iris.apply <- lapply(iris.split,function(x)colSums(x[]))
# 组合结果
iris.combine <- do.call(rbind,iris.apply)
ven2 = t(iris.combine)
# head(ven2)
ven2[ven2 < pick_val_num] = 0
ven2[ven2 >=pick_val_num] = 1
ven2 = as.data.frame(ven2)
#########更加高级的设置在这里可以查看#https://mp.weixin.qq.com/s/6l7gftKQfiyxNH66i19YtA
ven3 = as.list(ven2)
library (VennDiagram)
ven_pick = get.venn.partitions(ven3)
for (i in 1:ncol(ven2)) {
ven3[[i]] <- row.names(ven2[ven2[i] == 1,])
}
head(ven2)library (VennDiagram)
if (length(names(ven3)) == 2) {
filename3 = paste(path,"ven_",paste(names(ven3),sep = "",collapse="-"),".pdf",sep = "",collapse="_")
pdf(file=filename3,width = 8, height = 6)
T<-venn.diagram(ven3,
filename=NULL,
lwd=2,#圈线粗度
lty=1, #圈线类型
fill=c('red',"blue"), #填充颜色
col=c('red',"blue"), #圈线颜色
cat.col=c('red',"blue"),#A和B的颜色
cat.cex = 4,# A和B的大小
rotation.degree = 0,#旋转角度
main = "",#主标题内容
main.cex = 2,#主标题大小
sub = "",#亚标题内容
sub.cex = 1,#亚标题字大小
cex=3,#里面交集字的大小
alpha = 0.5,#透明度
reverse=TRUE,
scaled = FALSE)
grid.draw(T)
dev.off()
grid.draw(T)
} else if (length(names(ven3)) == 3) {
filename3 = paste(path,"ven_",paste(names(ven3),sep = "",collapse="-"),".pdf",sep = "",collapse="_")
pdf(file=filename3,width = 12, height = 12)
T<-venn.diagram(ven3,
filename=NULL,
lwd=2,#圈线粗度
lty=1, #圈线类型
fill=c('red',"blue","yellow"), #填充颜色
col=c('red',"blue","yellow"), #圈线颜色
cat.col=c('red',"blue","yellow"),#A和B的颜色
cat.cex = 4,# A和B的大小
rotation.degree = 0,#旋转角度
main = "",#主标题内容
main.cex = 2,#主标题大小
sub = "",#亚标题内容
sub.cex = 1,#亚标题字大小
cex=3,#里面交集字的大小
alpha = 0.5,#透明度
reverse=TRUE,
scaled = FALSE)
grid.draw(T)
dev.off()
grid.draw(T)
} else if (length(names(ven3)) == 4) {
filename3 = paste(path,"ven_",paste(names(ven3),sep = "",collapse="-"),".pdf",sep = "",collapse="_")
pdf(file=filename3,width = 12, height = 12)
T<-venn.diagram(ven3,
filename=NULL,
lwd=2,#圈线粗度
lty=1, #圈线类型
fill=c('red',"blue","yellow","#7ad2f6"), #填充颜色
col=c('red',"blue","yellow","#7ad2f6"), #圈线颜色
cat.col=c('red',"blue","yellow","#7ad2f6"),#A和B的颜色
cat.cex = 4,# A和B的大小
rotation.degree = 0,#旋转角度
main = "",#主标题内容
main.cex = 2,#主标题大小
sub = "",#亚标题内容
sub.cex = 1,#亚标题字大小
cex=3,#里面交集字的大小
alpha = 0.5,#透明度
reverse=TRUE,
scaled = FALSE)
grid.draw(T)
dev.off()
grid.draw(T)
}else if (length(names(ven3)) == 5) {
filename3 = paste(path,"ven_",paste(names(ven3),sep = "",collapse="-"),".pdf",sep = "",collapse="_")
pdf(file=filename3,width = 12, height = 12)
T<-venn.diagram(ven3,
filename=NULL,
lwd=2,#圈线粗度
lty=1, #圈线类型
fill=c('red',"blue","yellow","#7ad2f6","green"), #填充颜色
col=c('red',"blue","yellow","#7ad2f6","green"), #圈线颜色
cat.col=c('red',"blue","yellow","#7ad2f6","green"),#A和B的颜色
cat.cex = 4,# A和B的大小
rotation.degree = 0,#旋转角度
main = "",#主标题内容
main.cex = 2,#主标题大小
sub = "",#亚标题内容
sub.cex = 1,#亚标题字大小
cex=3,#里面交集字的大小
alpha = 0.5,#透明度
reverse=TRUE,
scaled = FALSE)
grid.draw(T)
dev.off()
grid.draw(T)
}else if (length(names(ven3)) == 6) {
print("ven not use for more than 6")
}
dev.off()
## null device
## 1# path = paste(path,"/",sep = "")
library(UpSetR)
#install.packages("UpSetR")
if (ven_pick[[1,6]] != 0) {
filename4 = paste(path,"/UpSet_",paste(names(ven3),sep = "",collapse="-"),".pdf",sep = "",collapse="_")
pdf(file=filename4,width = 12, height = 8)
p5 = upset(ven2, sets = colnames(ven2),
number.angles = 30, point.size = 2, line.size = 1,
mainbar.y.label = "OTU", sets.x.label = "OTU Per Treatment",
text.scale = c(2, 2, 2,2, 2, 2),mb.ratio = c(0.7, 0.3),order.by = "freq",keep.order = TRUE,
queries = list(list(query = intersects, params =
list(colnames(ven2)), color = "red", active = T),
list(query = intersects, params =
list(colnames(ven2)), color = "red", active = T),
list(query = intersects, params =
list(colnames(ven2)), color = "red", active = T)))
p5
dev.off()
}## null device
## 1# p5