PCoA-主坐标分析
PCoA运算过程解析
1)导入数据
library(tidyverse)
library(readxl)
library(ape)
library(vegan)
readxlsx <- function(file = "file.xlsx", n =3) {
require(readxl)
dat <- list()
i = 0
while (i < n) {
i = i+1
dat[[i]] <- read_excel(file, sheet = i, col_names = T)
}
return(dat)
}
pcoa <- readxlsx(file = "1-16S.xlsx", n =4)
pcoa
2)PCoA
数据处理:首先定义数据的行与列,在所有类型的排序分析中数据的行与列都需要明确的给出,其次检查数据的行名是否为数据的样方名,列名是否为物种名(或者OTUiD)若不是,需要在PCoA分析之前使用转置函数转变为以上格式。用于分析的数据格式如下:
适应于本节数据处理的代码如下:
bac <- pcoa[[1]]#select data
bac %>%
data.frame() ->bac2#change tibble to data.frame
rownames(bac2) <- bac2$OTUid#define rownames
bac2 <- bac2[,-1]#remove excess columns
head(bac2[,1:6])
tbac2 <- t(bac2) %>%
data.frame()
head(tbac2[,1:6])
PCoA
#PCoA
bac_bray <- vegdist(tbac2,method = "bray")
bac_pcoa <- pcoa(bac_bray, correction = "none")
bac_pcoa
biplot(bac_pcoa)
ggplot2绘图
bac_pcoa$values#查看每一轴的解释量
提取PCo1和PCo2坐标轴数据
bac_PCo1 <- bac_pcoa$vectors[,1]#26.85%
bac_PCo2 <- bac_pcoa$vectors[,2]#19.99%
bac_pco <- data.frame(bac_PCo1,bac_PCo2) %>%
as_tibble(rownames = "sample")
bac_pco
根据处理给数据分组
bac_pco2 <- bac_pco %>%
left_join(pcoa[[3]], by = c("sample" = "Code")) %>%
dplyr::select(sample:bac_PCo2,Tdiff) %>%
mutate(group=if_else(Tdiff< -5.7,"very cold",
if_else(Tdiff< 0,"cold",
if_else(Tdiff<5.7, "in situ",
if_else(Tdiff <9.6, "warm","hot")))))
bac_pco2
数据格式如下:
绘图背景等参数设置(直接粘贴并运行)
main_theme = theme(panel.background=element_blank(),
panel.grid=element_blank(),
axis.line.x=element_line(size=0.5, colour="black"),
axis.line.y=element_line(size=0.5, colour="black"),
axis.ticks=element_line(color="black"),
axis.text=element_text(color="black", size=12),
legend.position="right",
legend.background=element_blank(),
legend.key=element_blank(),
legend.text= element_text(size=12),
text=element_text(family="sans", size=12),
plot.title=element_text(hjust = 0.5,vjust=0.5,size=12),
plot.subtitle=element_text(size=12))
绘图
ggplot(bac_pco2,aes(bac_PCo1,bac_PCo2)) +
geom_hline(aes(yintercept=0),colour="#d8d6d6",linetype=5)+
geom_vline(aes(xintercept=0),colour="#d8d6d6",linetype=5)+
geom_point(aes(color = group),shape = 19,size = 3.5)+
scale_color_manual(values = c("#4393c3","#92c5de","#d1e5f0",
"#f4a582","#d6604d"),
limits = c("very cold","cold","in situ",
"warm","hot"))+
#scale_x_continuous(breaks = seq(-0.59,0.66,0.2),limits = c(-0.59,0.66))+
#scale_y_continuous(breaks = seq(-0.60,0.45,0.15),limits = c(-0.60,0.45))+
labs(x= "PCo1 (26.95%)", y = "PCo2 (19.99%)",color = "Treatments")+
theme_bw() +
main_theme
分组和“等温线” 绘图
分组绘图
ggplot(bac_pco2,aes(bac_PCo1,bac_PCo2)) +
geom_hline(aes(yintercept=0),colour="#d8d6d6",linetype=5)+
geom_vline(aes(xintercept=0),colour="#d8d6d6",linetype=5)+
geom_point(aes(color = group),shape = 19,size = 3.5)+
scale_color_manual(values = c("#4393c3","#92c5de","#d1e5f0",
"#f4a582","#d6604d"),
limits = c("very cold","cold","in situ",
"warm","hot"))+
scale_fill_manual(values = c("#4393c3","#92c5de","#d1e5f0",
"#f4a582","#d6604d"),
limits = c("very cold","cold","in situ",
"warm","hot"))+
#scale_x_continuous(breaks = seq(-0.59,0.66,0.2),limits = c(-0.59,0.66))+
#scale_y_continuous(breaks = seq(-0.60,0.45,0.15),limits = c(-0.60,0.45))+
stat_ellipse(aes(fill=group),geom="polygon",level=0.95,alpha=0.2)+
labs(x= "PCo1 (26.95%)", y = "PCo2 (19.99%)",
color = "Treatments",fill = "Treatments")+
theme_bw() +
main_theme
“等温线”绘图
ggplot(bac_pco2,aes(bac_PCo1,bac_PCo2)) +
geom_hline(aes(yintercept=0),colour="#d8d6d6",linetype=5)+
geom_vline(aes(xintercept=0),colour="#d8d6d6",linetype=5)+
geom_point(aes(color = group),shape = 19,size = 3.5)+
scale_color_manual(values = c("#4393c3","#92c5de","#d1e5f0",
"#f4a582","#d6604d"),
limits = c("very cold","cold","in situ",
"warm","hot"))+
#scale_x_continuous(breaks = seq(-0.59,0.66,0.2),limits = c(-0.59,0.66))+
#scale_y_continuous(breaks = seq(-0.60,0.45,0.15),limits = c(-0.60,0.45))+
stat_density2d(aes(color = group),size = 0.6)+
labs(x= "PCo1 (26.95%)", y = "PCo2 (19.99%)",
color = "Treatments",fill = "Treatments")+
theme_bw() +
main_theme
链接:https://www.aliyundrive.com/s/CFvkjJ3nECi
如有问题,可以加入我们的群聊一起讨论,如下:
最后奉上完整版代码,如下:
setwd(choose.dir())
rm(list = ls())
#------------------import data--------------------------
library(tidyverse)
library(readxl)
library(vegan)
library(ape)
readxlsx <- function(file = "file.xlsx", n =3) {
require(readxl)
dat <- list()
i = 0
while (i < n) {
i = i+1
dat[[i]] <- read_excel(file, sheet = i, col_names = T)
}
return(dat)
}
pcoa <- readxlsx(file = "1-16S.xlsx", n =4)
pcoa[[3]]
#------------------process data---------------------
bac <- pcoa[[1]]#select data
bac %>%
data.frame() ->bac2#change tibble to data.frame
rownames(bac2) <- bac2$OTUid#define rownames
bac2 <- bac2[,-1]#remove excess columns
head(bac2[,1:6])
tbac2 <- t(bac2) %>%
data.frame()
head(tbac2[,1:6])
#------------------PCoA---------------------------
bac_bray <- vegdist(tbac2,method = "bray")
bac_pcoa <- pcoa(bac_bray, correction = "none")
bac_pcoa
biplot(bac_pcoa)#
bac_pcoa$values
bac_PCo1 <- bac_pcoa$vectors[,1]#26.85%
bac_PCo2 <- bac_pcoa$vectors[,2]#19.99%
bac_pco <- data.frame(bac_PCo1,bac_PCo2) %>%
as_tibble(rownames = "sample")
bac_pco
View(bac_pco2)
bac_pco2 <- bac_pco %>%
left_join(pcoa[[3]], by = c("sample" = "Code")) %>%
dplyr::select(sample:bac_PCo2,Tdiff) %>%
mutate(group=if_else(Tdiff< -5.7,"very cold",
if_else(Tdiff< 0,"cold",
if_else(Tdiff<5.7, "in situ",
if_else(Tdiff <9.6, "warm","hot")))))
#-----------------------plot-------------------------------------
main_theme = theme(panel.background=element_blank(),
panel.grid=element_blank(),
axis.line.x=element_line(size=0.5, colour="black"),
axis.line.y=element_line(size=0.5, colour="black"),
axis.ticks=element_line(color="black"),
axis.text=element_text(color="black", size=12),
legend.position="right",
legend.background=element_blank(),
legend.key=element_blank(),
legend.text= element_text(size=12),
text=element_text(family="sans", size=12),
plot.title=element_text(hjust = 0.5,vjust=0.5,size=12),
plot.subtitle=element_text(size=12))
ggplot(bac_pco2,aes(bac_PCo1,bac_PCo2)) +
geom_hline(aes(yintercept=0),colour="#d8d6d6",linetype=5)+
geom_vline(aes(xintercept=0),colour="#d8d6d6",linetype=5)+
geom_point(aes(color = group),shape = 19,size = 3.5)+
scale_color_manual(values = c("#4393c3","#92c5de","#d1e5f0",
"#f4a582","#d6604d"),
limits = c("very cold","cold","in situ",
"warm","hot"))+
#scale_x_continuous(breaks = seq(-0.59,0.66,0.2),limits = c(-0.59,0.66))+
#scale_y_continuous(breaks = seq(-0.60,0.45,0.15),limits = c(-0.60,0.45))+
labs(x= "PCo1 (26.95%)", y = "PCo2 (19.99%)",color = "Treatments")+
theme_bw() +
main_theme
#group
ggplot(bac_pco2,aes(bac_PCo1,bac_PCo2)) +
geom_hline(aes(yintercept=0),colour="#d8d6d6",linetype=5)+
geom_vline(aes(xintercept=0),colour="#d8d6d6",linetype=5)+
geom_point(aes(color = group),shape = 19,size = 3.5)+
scale_color_manual(values = c("#4393c3","#92c5de","#d1e5f0",
"#f4a582","#d6604d"),
limits = c("very cold","cold","in situ",
"warm","hot"))+
scale_fill_manual(values = c("#4393c3","#92c5de","#d1e5f0",
"#f4a582","#d6604d"),
limits = c("very cold","cold","in situ",
"warm","hot"))+
#scale_x_continuous(breaks = seq(-0.59,0.66,0.2),limits = c(-0.59,0.66))+
#scale_y_continuous(breaks = seq(-0.60,0.45,0.15),limits = c(-0.60,0.45))+
stat_ellipse(aes(fill=group),geom="polygon",level=0.95,alpha=0.2)+
labs(x= "PCo1 (26.95%)", y = "PCo2 (19.99%)",
color = "Treatments",fill = "Treatments")+
theme_bw() +
main_theme
#isotherm
ggplot(bac_pco2,aes(bac_PCo1,bac_PCo2)) +
geom_hline(aes(yintercept=0),colour="#d8d6d6",linetype=5)+
geom_vline(aes(xintercept=0),colour="#d8d6d6",linetype=5)+
geom_point(aes(color = group),shape = 19,size = 3.5)+
scale_color_manual(values = c("#4393c3","#92c5de","#d1e5f0",
"#f4a582","#d6604d"),
limits = c("very cold","cold","in situ",
"warm","hot"))+
#scale_x_continuous(breaks = seq(-0.59,0.66,0.2),limits = c(-0.59,0.66))+
#scale_y_continuous(breaks = seq(-0.60,0.45,0.15),limits = c(-0.60,0.45))+
stat_density2d(aes(color = group),size = 0.6)+
labs(x= "PCo1 (26.95%)", y = "PCo2 (19.99%)",
color = "Treatments",fill = "Treatments")+
theme_bw() +
main_theme