补-三个重复的置信区间添加算法和可视化

wentao

写在前面

我上一篇推文对三个重复的样本点添加了“置信区间”,但是大家还是想要和超过四个点以上的一样的置信区间。我本来是想写一个,但是今天在Google上搜索了一下。发现这个问题在七年前就已经被解决了,这里七年前我还尚未接触微生物,自然不知道,现在我将这个算法跟新,给大家做一个展示。

library(vegan)
library(tidyverse)
library(ellipse)
#---导入数据
otu =read.csv("./otu.csv",row.names = 1) %>% t() %>% as.data.frame()
head(otu)
## sample1 sample2 sample3 sample4 sample5 sample6
## OTU1 0.45724487 0.47512130 0.48076031 0.42379195 0.44256752 0.39355241
## OTU2 0.20552670 0.24318511 0.18764187 0.18471523 0.19670737 0.18976133
## OTU3 0.22758158 0.10338618 0.19905067 0.02797361 0.07637440 0.09065376
## OTU4 0.04945469 0.33300000 0.04415425 0.01939449 0.04472809 0.04478728
## OTU5 0.06160738 0.05617967 0.06110291 0.07147906 0.05521273 0.04514701
## OTU6 0.01783519 0.03173110 0.01958146 0.04212304 0.03914910 0.05081287
## sample7 sample8 sample9 sample10 sample11 sample12
## OTU1 0.38670554 0.35410840 0.41203157 0.29126527 0.33256955 0.21680160
## OTU2 0.23526525 0.20533941 0.20383444 0.20279485 0.23199267 0.25137003
## OTU3 0.14723778 0.20033434 0.05784600 0.22615791 0.07583567 0.13916428
## OTU4 0.05568320 0.07731510 0.03570940 0.08116948 0.05522967 0.05496949
## OTU5 0.02754142 0.03477204 0.04467715 0.04105715 0.03546474 0.03578279
## OTU6 0.04480170 0.03549645 0.04322292 0.05857946 0.05109446 0.07431810
map = read.csv(
"./map.csv"
)
head(map)
## 锘縎ample.ID Group
## 1 sample1 A
## 2 sample2 A
## 3 sample3 A
## 4 sample4 B
## 5 sample5 B
## 6 sample6 B
colnames(map)[1] = "ID"
row.names(map) = map$ID

idx = rownames(map) %in% colnames(otu)
map1 = map[idx,]
map1
## ID Group
## sample1 sample1 A
## sample2 sample2 A
## sample3 sample3 A
## sample4 sample4 B
## sample5 sample5 B
## sample6 sample6 B
## sample7 sample7 C
## sample8 sample8 C
## sample9 sample9 C
## sample10 sample10 D
## sample11 sample11 D
## sample12 sample12 D
otu = otu[,rownames(map1)]
head(otu)
## sample1 sample2 sample3 sample4 sample5 sample6
## OTU1 0.45724487 0.47512130 0.48076031 0.42379195 0.44256752 0.39355241
## OTU2 0.20552670 0.24318511 0.18764187 0.18471523 0.19670737 0.18976133
## OTU3 0.22758158 0.10338618 0.19905067 0.02797361 0.07637440 0.09065376
## OTU4 0.04945469 0.33300000 0.04415425 0.01939449 0.04472809 0.04478728
## OTU5 0.06160738 0.05617967 0.06110291 0.07147906 0.05521273 0.04514701
## OTU6 0.01783519 0.03173110 0.01958146 0.04212304 0.03914910 0.05081287
## sample7 sample8 sample9 sample10 sample11 sample12
## OTU1 0.38670554 0.35410840 0.41203157 0.29126527 0.33256955 0.21680160
## OTU2 0.23526525 0.20533941 0.20383444 0.20279485 0.23199267 0.25137003
## OTU3 0.14723778 0.20033434 0.05784600 0.22615791 0.07583567 0.13916428
## OTU4 0.05568320 0.07731510 0.03570940 0.08116948 0.05522967 0.05496949
## OTU5 0.02754142 0.03477204 0.04467715 0.04105715 0.03546474 0.03578279
## OTU6 0.04480170 0.03549645 0.04322292 0.05857946 0.05109446 0.07431810
bray_curtis =vegan:: vegdist(t(otu),method = "bray", na.rm=TRUE)

pcoa <- otu %>%
t() %>%
vegan:: vegdist(method = "bray", na.rm=TRUE) %>%
cmdscale(k=2, eig=T)

points <- pcoa$points %>% as.data.frame() %>%
# as.tibble() %>%
dplyr::rename(.,x = "V1",y = "V2" )
head(points)
## x y
## sample1 -0.11731023 -0.04846822
## sample2 -0.08290919 -0.06069551
## sample3 -0.10191448 -0.06509878
## sample4 0.06356767 -0.05047065
## sample5 0.01970246 -0.04969312
## sample6 0.19534848 -0.07646325
eig = pcoa$eig
eig
## [1] 8.972991e-02 6.050659e-02 3.402134e-02 2.660735e-02 9.079289e-03
## [6] 2.808910e-03 1.395223e-03 1.505786e-04 1.387779e-17 -2.373096e-04
## [11] -2.402774e-03 -4.738457e-03
points = cbind(points, map1[match(rownames(points), map1$ID), ])
ado = adonis(bray_curtis~ map1$Group,permutations = 999,method="bray")
a = round(as.data.frame(ado$aov.tab[5])[1,1],3)
R2 <- paste("adonis:R ",a, sep = "")
b = as.data.frame(ado$aov.tab[6])[1,1]
p_v = paste("p: ",b, sep = "")
title = paste(R2," ",p_v, sep = "")
title
## [1] "adonis:R 0.525 p: 0.001"

level 设置指定值,常规0.95。

level = 0.95
head(points)
## x y ID Group
## sample1 -0.11731023 -0.04846822 sample1 A
## sample2 -0.08290919 -0.06069551 sample2 A
## sample3 -0.10191448 -0.06509878 sample3 A
## sample4 0.06356767 -0.05047065 sample4 B
## sample5 0.01970246 -0.04969312 sample5 B
## sample6 0.19534848 -0.07646325 sample6 B
centroids <- aggregate(cbind(x,y)~ Group,points,mean)
head(centroids)
## Group x y
## 1 A -0.100711300 -0.05808750
## 2 B 0.092872870 -0.05887567
## 3 C -0.004767997 0.01279695
## 4 D 0.012606427 0.10416622
row.names(centroids) = centroids$Group

conf.rgn <- do.call(rbind,lapply(unique(points$Group),function(t)
data.frame(Group=as.character(t),
ellipse::ellipse(cov(points[points$Group==t,1:2]),
centre=as.matrix(centroids[t,2:3]),
level=level),
stringsAsFactors=FALSE)))

p4 <- ggplot(points, aes(x=x, y=y, fill = Group)) +
geom_point(alpha=.7, size=5, pch = 21) +
labs(x=paste("PCoA 1 (", format(100 * eig[1] / sum(eig), digits=4), "%)", sep=""),
y=paste("PCoA 2 (", format(100 * eig[2] / sum(eig), digits=4), "%)", sep=""),
title=title) +
geom_path(data=conf.rgn,linetype = 2,aes(color = Group)) +
theme_classic()
p4

如果样本点重叠较多,就将level设定小一点。

level = 0.7
head(points)
## x y ID Group
## sample1 -0.11731023 -0.04846822 sample1 A
## sample2 -0.08290919 -0.06069551 sample2 A
## sample3 -0.10191448 -0.06509878 sample3 A
## sample4 0.06356767 -0.05047065 sample4 B
## sample5 0.01970246 -0.04969312 sample5 B
## sample6 0.19534848 -0.07646325 sample6 B
centroids <- aggregate(cbind(x,y)~ Group,points,mean)
head(centroids)
## Group x y
## 1 A -0.100711300 -0.05808750
## 2 B 0.092872870 -0.05887567
## 3 C -0.004767997 0.01279695
## 4 D 0.012606427 0.10416622
row.names(centroids) = centroids$Group

conf.rgn <- do.call(rbind,lapply(unique(points$Group),function(t)
data.frame(Group=as.character(t),
ellipse::ellipse(cov(points[points$Group==t,1:2]),
centre=as.matrix(centroids[t,2:3]),
level=level),
stringsAsFactors=FALSE)))

p4 <- ggplot(points, aes(x=x, y=y, fill = Group)) +
geom_point(alpha=.7, size=5, pch = 21) +
labs(x=paste("PCoA 1 (", format(100 * eig[1] / sum(eig), digits=4), "%)", sep=""),
y=paste("PCoA 2 (", format(100 * eig[2] / sum(eig), digits=4), "%)", sep=""),
title=title) +
geom_path(data=conf.rgn,linetype = 2,aes(color = Group)) +
theme_classic()
p4

根际互作生物学研究室 简介

根际互作生物学研究室是沈其荣教授土壤微生物与有机肥团队下的一个关注于根际互作的研究小组。本小组由袁军副教授带领,主要关注:1.植物和微生物互作在抗病过程中的作用;2 环境微生物大数据整合研究;3 环境代谢组及其与微生物过程研究体系开发和应用。团队在过去三年中在 isme J, Microbiome, PCE,SBB,Horticulture Research等期刊上发表了多篇文章。欢迎关注微生信生物公众号对本研究小组进行了解。

(0)

相关推荐