PCoA-主坐标分析

 No one knows everything, and you don't have to.”   --free傻孩子
"R数据分析"专题·第16篇
  编辑 | free傻孩子
  4445字 | 7分钟阅读
本期推送内容
上一期我们比较详细的介绍了NMDS分析和画图,正如上期我们所说的那样,NMDS分析没有办法给出每一轴对群落分布的解释量,但是有时候我们确实需要一种能够给出每一轴解释量的排序分析。在这种情况下,主坐标分析[Principal coordinate analysis ( PCoA)]可能会是一个比较不错的选择。
01

PCoA运算过程解析

1)导入数据

library(tidyverse)
library(readxl)
library(ape)
library(vegan)
readxlsx <- function(file = "file.xlsx", n =3) {
  require(readxl)
  dat <- list()
  i = 0
  while (i < n) {
    i = i+1
    dat[[i]] <- read_excel(file, sheet = i, col_names = T)
  }
  return(dat)
}

pcoa <- readxlsx(file = "1-16S.xlsx", n =4)
pcoa

2)PCoA

数据处理:首先定义数据的行与列,在所有类型的排序分析中数据的行与列都需要明确的给出,其次检查数据的行名是否为数据的样方名,列名是否为物种名(或者OTUiD)若不是,需要在PCoA分析之前使用转置函数转变为以上格式。用于分析的数据格式如下:

适应于本节数据处理的代码如下:

bac <- pcoa[[1]]#select data
bac %>%
  data.frame() ->bac2#change tibble to data.frame
rownames(bac2) <- bac2$OTUid#define rownames
bac2 <- bac2[,-1]#remove excess columns
head(bac2[,1:6])
tbac2 <- t(bac2) %>%
  data.frame()
head(tbac2[,1:6])

PCoA

#PCoA
bac_bray <- vegdist(tbac2,method = "bray")
bac_pcoa <- pcoa(bac_bray, correction = "none")
bac_pcoa
biplot(bac_pcoa)
02

ggplot2绘图

bac_pcoa$values#查看每一轴的解释量

提取PCo1和PCo2坐标轴数据

bac_PCo1  <- bac_pcoa$vectors[,1]#26.85%
bac_PCo2 <- bac_pcoa$vectors[,2]#19.99%
bac_pco <- data.frame(bac_PCo1,bac_PCo2) %>%
  as_tibble(rownames = "sample")
bac_pco

根据处理给数据分组

bac_pco2 <- bac_pco %>%
  left_join(pcoa[[3]], by = c("sample" = "Code")) %>%
  dplyr::select(sample:bac_PCo2,Tdiff) %>%
  mutate(group=if_else(Tdiff< -5.7,"very cold",
                      if_else(Tdiff< 0,"cold",
                              if_else(Tdiff<5.7, "in situ",
                                      if_else(Tdiff <9.6, "warm","hot")))))
bac_pco2

数据格式如下:

绘图背景等参数设置(直接粘贴并运行)

main_theme = theme(panel.background=element_blank(),
                   panel.grid=element_blank(),
                   axis.line.x=element_line(size=0.5, colour="black"),
                   axis.line.y=element_line(size=0.5, colour="black"),
                   axis.ticks=element_line(color="black"),
                   axis.text=element_text(color="black", size=12),
                   legend.position="right",
                   legend.background=element_blank(),
                   legend.key=element_blank(),
                   legend.text= element_text(size=12),
                   text=element_text(family="sans", size=12),
                   plot.title=element_text(hjust = 0.5,vjust=0.5,size=12),
                   plot.subtitle=element_text(size=12))

绘图

ggplot(bac_pco2,aes(bac_PCo1,bac_PCo2)) +
  geom_hline(aes(yintercept=0),colour="#d8d6d6",linetype=5)+
  geom_vline(aes(xintercept=0),colour="#d8d6d6",linetype=5)+
  geom_point(aes(color = group),shape = 19,size = 3.5)+
  scale_color_manual(values = c("#4393c3","#92c5de","#d1e5f0",
                                "#f4a582","#d6604d"),
                     limits = c("very cold","cold","in situ",
                                "warm","hot"))+
  #scale_x_continuous(breaks = seq(-0.59,0.66,0.2),limits = c(-0.59,0.66))+
  #scale_y_continuous(breaks = seq(-0.60,0.45,0.15),limits = c(-0.60,0.45))+
  labs(x= "PCo1 (26.95%)", y = "PCo2 (19.99%)",color = "Treatments")+
  theme_bw() +
  main_theme
03

分组和“等温线” 绘图

分组绘图

ggplot(bac_pco2,aes(bac_PCo1,bac_PCo2)) +
  geom_hline(aes(yintercept=0),colour="#d8d6d6",linetype=5)+
  geom_vline(aes(xintercept=0),colour="#d8d6d6",linetype=5)+
  geom_point(aes(color = group),shape = 19,size = 3.5)+
  scale_color_manual(values = c("#4393c3","#92c5de","#d1e5f0",
                                "#f4a582","#d6604d"),
                     limits = c("very cold","cold","in situ",
                                "warm","hot"))+
  scale_fill_manual(values = c("#4393c3","#92c5de","#d1e5f0",
                                 "#f4a582","#d6604d"),
                      limits = c("very cold","cold","in situ",
                                 "warm","hot"))+
  #scale_x_continuous(breaks = seq(-0.59,0.66,0.2),limits = c(-0.59,0.66))+
  #scale_y_continuous(breaks = seq(-0.60,0.45,0.15),limits = c(-0.60,0.45))+
  stat_ellipse(aes(fill=group),geom="polygon",level=0.95,alpha=0.2)+
  labs(x= "PCo1 (26.95%)", y = "PCo2 (19.99%)",
       color = "Treatments",fill = "Treatments")+
  theme_bw() +
  main_theme

“等温线”绘图

ggplot(bac_pco2,aes(bac_PCo1,bac_PCo2)) +
  geom_hline(aes(yintercept=0),colour="#d8d6d6",linetype=5)+
  geom_vline(aes(xintercept=0),colour="#d8d6d6",linetype=5)+
  geom_point(aes(color = group),shape = 19,size = 3.5)+
  scale_color_manual(values = c("#4393c3","#92c5de","#d1e5f0",
                                "#f4a582","#d6604d"),
                     limits = c("very cold","cold","in situ",
                                "warm","hot"))+
  #scale_x_continuous(breaks = seq(-0.59,0.66,0.2),limits = c(-0.59,0.66))+
  #scale_y_continuous(breaks = seq(-0.60,0.45,0.15),limits = c(-0.60,0.45))+
  stat_density2d(aes(color = group),size = 0.6)+
  labs(x= "PCo1 (26.95%)", y = "PCo2 (19.99%)",
       color = "Treatments",fill = "Treatments")+
  theme_bw() +
  main_theme

链接:https://www.aliyundrive.com/s/CFvkjJ3nECi

如有问题,可以加入我们的群聊一起讨论,如下:

最后奉上完整版代码,如下:

setwd(choose.dir())
rm(list = ls())
#------------------import data--------------------------
library(tidyverse)
library(readxl)
library(vegan)
library(ape)

readxlsx <- function(file = "file.xlsx", n =3) {
  require(readxl)
  dat <- list()
  i = 0
  while (i < n) {
    i = i+1
    dat[[i]] <- read_excel(file, sheet = i, col_names = T)
  }
  return(dat)
}

pcoa <- readxlsx(file = "1-16S.xlsx", n =4)
pcoa[[3]]
#------------------process data---------------------
bac <- pcoa[[1]]#select data
bac %>%
  data.frame() ->bac2#change tibble to data.frame
rownames(bac2) <- bac2$OTUid#define rownames
bac2 <- bac2[,-1]#remove excess columns
head(bac2[,1:6])
tbac2 <- t(bac2) %>%
  data.frame()
head(tbac2[,1:6])
#------------------PCoA---------------------------
bac_bray <- vegdist(tbac2,method = "bray")
bac_pcoa <- pcoa(bac_bray, correction = "none")
bac_pcoa
biplot(bac_pcoa)#
bac_pcoa$values
bac_PCo1 <- bac_pcoa$vectors[,1]#26.85%
bac_PCo2 <- bac_pcoa$vectors[,2]#19.99%
bac_pco <- data.frame(bac_PCo1,bac_PCo2) %>%
  as_tibble(rownames = "sample")
bac_pco
View(bac_pco2)
bac_pco2 <- bac_pco %>%
  left_join(pcoa[[3]], by = c("sample" = "Code")) %>%
  dplyr::select(sample:bac_PCo2,Tdiff) %>%
  mutate(group=if_else(Tdiff< -5.7,"very cold",
                      if_else(Tdiff< 0,"cold",
                              if_else(Tdiff<5.7, "in situ",
                                      if_else(Tdiff <9.6, "warm","hot")))))
#-----------------------plot-------------------------------------
main_theme = theme(panel.background=element_blank(),
                   panel.grid=element_blank(),
                   axis.line.x=element_line(size=0.5, colour="black"),
                   axis.line.y=element_line(size=0.5, colour="black"),
                   axis.ticks=element_line(color="black"),
                   axis.text=element_text(color="black", size=12),
                   legend.position="right",
                   legend.background=element_blank(),
                   legend.key=element_blank(),
                   legend.text= element_text(size=12),
                   text=element_text(family="sans", size=12),
                   plot.title=element_text(hjust = 0.5,vjust=0.5,size=12),
                   plot.subtitle=element_text(size=12))

ggplot(bac_pco2,aes(bac_PCo1,bac_PCo2)) +
  geom_hline(aes(yintercept=0),colour="#d8d6d6",linetype=5)+
  geom_vline(aes(xintercept=0),colour="#d8d6d6",linetype=5)+
  geom_point(aes(color = group),shape = 19,size = 3.5)+
  scale_color_manual(values = c("#4393c3","#92c5de","#d1e5f0",
                                "#f4a582","#d6604d"),
                     limits = c("very cold","cold","in situ",
                                "warm","hot"))+
  #scale_x_continuous(breaks = seq(-0.59,0.66,0.2),limits = c(-0.59,0.66))+
  #scale_y_continuous(breaks = seq(-0.60,0.45,0.15),limits = c(-0.60,0.45))+
  labs(x= "PCo1 (26.95%)", y = "PCo2 (19.99%)",color = "Treatments")+
  theme_bw() +
  main_theme
#group
ggplot(bac_pco2,aes(bac_PCo1,bac_PCo2)) +
  geom_hline(aes(yintercept=0),colour="#d8d6d6",linetype=5)+
  geom_vline(aes(xintercept=0),colour="#d8d6d6",linetype=5)+
  geom_point(aes(color = group),shape = 19,size = 3.5)+
  scale_color_manual(values = c("#4393c3","#92c5de","#d1e5f0",
                                "#f4a582","#d6604d"),
                     limits = c("very cold","cold","in situ",
                                "warm","hot"))+
  scale_fill_manual(values = c("#4393c3","#92c5de","#d1e5f0",
                                 "#f4a582","#d6604d"),
                      limits = c("very cold","cold","in situ",
                                 "warm","hot"))+
  #scale_x_continuous(breaks = seq(-0.59,0.66,0.2),limits = c(-0.59,0.66))+
  #scale_y_continuous(breaks = seq(-0.60,0.45,0.15),limits = c(-0.60,0.45))+
  stat_ellipse(aes(fill=group),geom="polygon",level=0.95,alpha=0.2)+
  labs(x= "PCo1 (26.95%)", y = "PCo2 (19.99%)",
       color = "Treatments",fill = "Treatments")+
  theme_bw() +
  main_theme
#isotherm
ggplot(bac_pco2,aes(bac_PCo1,bac_PCo2)) +
  geom_hline(aes(yintercept=0),colour="#d8d6d6",linetype=5)+
  geom_vline(aes(xintercept=0),colour="#d8d6d6",linetype=5)+
  geom_point(aes(color = group),shape = 19,size = 3.5)+
  scale_color_manual(values = c("#4393c3","#92c5de","#d1e5f0",
                                "#f4a582","#d6604d"),
                     limits = c("very cold","cold","in situ",
                                "warm","hot"))+
  #scale_x_continuous(breaks = seq(-0.59,0.66,0.2),limits = c(-0.59,0.66))+
  #scale_y_continuous(breaks = seq(-0.60,0.45,0.15),limits = c(-0.60,0.45))+
  stat_density2d(aes(color = group),size = 0.6)+
  labs(x= "PCo1 (26.95%)", y = "PCo2 (19.99%)",
       color = "Treatments",fill = "Treatments")+
  theme_bw() +
  main_theme

(0)

相关推荐