一条代码完成完成无限分组的微生物差异分析

写在前面

今天是2020年10月6日,这几天都很忙碌,许多批次的数据需要再次分析和进一步分析,许多材料需要赶出来,百忙之中还有几位同学的婚礼,确实非南京很难到场,这里祝愿这几位百年好合,早生贵子。国庆节哪里没有去 就几千大洋有没有了。在微生物差异分析界,其实直接使用t检验和非参数检验不是很好,及时使用的抽平的微生物群落数据或者相对标准化矫正数据。建议使用TPM或者TMM等方法矫正文库后再用非参数检验来做差异分析。今天我要写一个什么样的函数呢?

  • 一个可以即使有许多的分组,都可以一次跑出来差异分析的函数;

  • 一个将两两差异比较结果放到一张表格中,可以再两两差异的基础上再次比对不同差异结果的函数;

  • 一个只需要一条代码就可以做到t检验和非参数检验的函数

  • 一个专门为微生物组数据而写,直接使用phyloseq对象这一个输入数据就可以满足的函数;

这个函数我用了很长时间了,应该不会有什么问题?如果有问题,请及时联系,留言,我会解决的,如果是R包的依赖问题,请自行安装R包解决。

使用之前

用之前请先安装R包:这并不是绑定,只是这个函数有个功能调用的ggClusterNet,点击查看什么是ggClusterNe

devtools::install_github("taowenmicro/ggClusterNet")

函数命令解析

statSuper = function(otu = NULL,tax = NULL,map = NULL,tree = NULL ,ps = NULL,group = "Group",pvalue = 0.05,artGroup = NULL,method = "wilcox" )

  • otu = NULL 可选输入otu表格;

  • tax = NULL 可选输入注释文件;

  • map = NULL 可选输入分组文件;

  • tree = NULL 可选输入进化树,但这里暂时用不到;

  • ps = NULL 可选输入phyloseq对象;

  • group  = “Group” 分组文件中样本分组列;

  • pvalue = 0.05 p值阈值;

  • artGroup = NULL 是否人工置指定分组,默认NULL代表这全部分组的两两组合都会被检测差异;

  • method = “wilcox”  差异检测方法吗,默认非参数检验,可选ttext方法;

使用函数

指定分组模式

library(tibble)
source("./statSuper.R")

ps = readRDS("./ps_soil.rds")
#-----人工指定分组信息
group1 = c("KO","OE")
group2 = c("WT","OE")
b= data.frame(group1)
result = statSuper(ps = ps,group = "Group",artGroup = b,method = "wilcox")

全部分组模式

将map文件(分组)中全部的组合都进行两两组合,并用于检验,这里呢注意分组不要太多,否则时间花费非常长;

result = statSuper(ps = ps,group = "Group",artGroup = NULL,method = "wilcox")

结果解读

  • 如下所示,KO1       KO2         KO3         KO4         KO5    ····这些代表每个样本的丰度;

  • KO_OE_log2_FC      KO_OE_Pvalue         KO_OE_fdr 为一组差异检测结果,带有两个分组的名称,用下划线分隔

  • Kingdom         Phylum··· 为微生物注释信息,如果没有则不会加到后面

  • id 为OTU的ID。

Row.names KO1 KO2 KO3 KO4 KO5 KO6 OE1 OE2 OE3 OE4 OE5 OE6
1 ASV_1 3.34918151 5.4462433 2.094939796 3.600482864 2.726572529 2.94213176 3.915281931 4.932505725 3.950103950 2.920284136 3.743058682 3.173265937
2 ASV_10 1.08329321 1.0045662 1.768889117 1.994436572 2.626444159 1.79180425 0.610414033 0.533325298 0.811388311 0.849978751 1.212666967 0.870541983
3 ASV_100 0.23772268 0.0498132 0.097558471 0.154831260 0.757381258 0.06766632 0.089404076 0.165722550 0.043312543 0.346062777 0.216118865 0.124808886
4 ASV_1000 0.01203659 0.0166044 0.012836641 0.007872776 0.038510911 0.01353326 0.006165798 0.018078824 0.014437514 0.006071277 0.003001651 0.021841555
5 ASV_1001 0.02106403 0.0055348 0.002567328 0.005248517 0.033376123 0.02977318 0.015414496 0.006026275 0.014437514 0.000000000 0.006003302 0.006240444
6 ASV_1002 0.01203659 0.0249066 0.030807938 0.013121293 0.005134788 0.03789314 0.018497395 0.063275883 0.005775006 0.009106915 0.009004953 0.015601111
WT1 WT2 WT3 WT4 WT5 WT6 KO_OE_log2_FC KO_OE_Pvalue KO_OE_fdr KO_WT_log2_FC
1 6.270221129 6.611613307 4.708391436 5.428246384 3.838971294 4.18862243 -0.167012965080295 0.297953061608168 0.297953061608168 -0.622796380894282
2 0.885050402 1.529163519 0.678095863 0.709680914 1.397929783 0.75545758 1.07002029997754 0.013065226764426 0.013065226764426 0.785481488943152
3 0.088237653 0.018235340 0.136712876 0.162522347 0.101376587 0.19088074 0.467617979120866 1 1 0.961623638785898
4 0.002673868 0.000000000 0.000000000 0.002708706 0.021342439 0.02150769 0.506527622556004 0.575173531920197 0.575173531920197 0.985686275619134
5 0.000000000 0.007815146 0.008202773 0.000000000 0.002667805 0.01881923 0.936234514687201 0.688920555804461 0.688920555804461 1.2512708953234
6 0.005347737 0.000000000 0.010937030 0.008126117 0.005335610 0.01344231 0.029612008342961 0.688920555804461 0.688920555804461 1.40100359630481
KO_WT_Pvalue KO_WT_fdr OE_WT_log2_FC OE_WT_Pvalue OE_WT_fdr KO OE WT Kingdom Phylum
1 0.0306389879377033 0.0306389879377033 -0.455783415813987 0.0453275620779722 0.0453275620779722 3.35992529 3.772416727 5.174344330 Bacteria Actinobacteria
2 0.0306389879377033 0.0306389879377033 -0.284538811034385 0.471169998490056 0.471169998490056 1.71157225 0.814719224 0.992563010 Bacteria Proteobacteria
3 0.810181236410474 0.810181236410474 0.494005659665031 0.471169998490056 0.471169998490056 0.22749553 0.164238283 0.116327591 Bacteria Proteobacteria
4 0.228950681414595 0.228950681414595 0.47915865306313 0.228950681414595 0.228950681414595 0.01689910 0.011599437 0.008038784 Bacteria Proteobacteria
5 0.228950681414595 0.228950681414595 0.315036380636201 0.808865613239999 0.808865613239999 0.01626066 0.008020338 0.006250825 Bacteria Proteobacteria
6 0.0926958025578126 0.0926958025578126 1.37139158796185 0.0926958025578126 0.0926958025578126 0.02065006 0.020210210 0.007198133 Bacteria Firmicutes
Class Order Family Genus Species id
1 Actinobacteria Actinomycetales Thermomonosporaceae Unassigned Unassigned ASV_1
2 Alphaproteobacteria Rhizobiales Rhizobiaceae Rhizobium Unassigned ASV_10
3 Betaproteobacteria Burkholderiales Comamonadaceae Hydrogenophaga Hydrogenophaga_intermedia ASV_100
4 Alphaproteobacteria Sphingomonadales Sphingomonadaceae Unassigned Unassigned ASV_1000
5 Alphaproteobacteria Rhizobiales Hyphomicrobiaceae Devosia Unassigned ASV_1001
6 Bacilli Bacillales Paenibacillaceae_2 Unassigned Unassigned ASV_1002

主函数

statSuper = function(otu = NULL,tax = NULL,map = NULL,tree = NULL ,ps = NULL,group = "Group",pvalue = 0.05,artGroup = NULL,method = "wilcox" ){
#--功能函数
ps = inputMicro(otu,tax,map,tree,ps,group = group)
ps
sub_design <- as.data.frame(sample_data(ps))
colnames(sub_design) = "Group"
Desep_group <- as.character(levels(sub_design$Group))
Desep_group
if ( is.null(artGroup)) {
#--构造两两组合#-----
aaa = combn(Desep_group,2)
# sub_design <- as.data.frame(sample_data(ps))
}
if (!is.null(artGroup)) {
aaa = as.matrix(b )
}
aaa
#-将NA填充为0#
re = otu_table(ps)
#--将空缺值填充为0
re[is.na(re)] <- 0
otu_table(ps) <- re

#相对丰度转化
ps_rela = transform_sample_counts(ps, function(x) x / sum(x) );ps_rela

#-----------------差异分析过程#-----------------------
# otu_table = as.data.frame(vegan_otu(ps))
count = vegan_otu(ps)
count <- t(count)
#数据整理形式一######
sub_design <- as.data.frame(sample_data(ps))
sub_design$ID = row.names(sub_design)
# 转换原始数据为百分比,
# head(norm)
norm=as.data.frame(t(t(count)/colSums(count,na=TRUE)) * 100) # normalization to total 100
AA=norm
#------------根据分组提取需要的差异结果#------------
table = NULL
for (ii in 1:dim(aaa)[2]) {
# ii = 1
Desep_group = aaa[,ii]
print( Desep_group)

# head(design)
#预生成2个长度与输入文件行数相同的全为0的向量,将用于存储p value和差异倍数(log2FC)
Pvalue<-c(rep(0,nrow(AA)))
fdr<-c(rep(0,nrow(AA)))
log2_FC<-c(rep(0,nrow(AA)))

# df_filter<- dplyr::filter(sub_design,SampleType %in% Desep_group)

df_filter<- sub_design[sub_design$Group%in%Desep_group,]

head(df_filter)

AA = as.data.frame(AA)
AAA = AA[as.character(df_filter$ID)]
head(AAA)
rep = length(as.character(df_filter$ID))/2

a = as.matrix(AAA)
###########开始运行脚本
if (method == "ttext") {
for(i in 1:nrow(a)){
if(sd(a[i,(1:rep)])==0&&sd(a[i,(rep+1):(rep*2)])==0){
Pvalue[i] <-"NA"
log2_FC[i]<-"NA"
}else{
y=t.test(as.numeric(a[i,(1:rep)]),as.numeric(a[i,(rep+1):(rep*2)]))
Pvalue[i]<-y$p.value
log2_FC[i]<-log2((mean(as.numeric(a[i,(1:rep)]))+0.001)/(mean(as.numeric(a[i,(rep+1):(rep*2)]))+0.001))
fdr[i]=p.adjust(Pvalue[i], "BH")
}
}
}

if (method == "wilcox") {
for(i in 1:nrow(a)){
if(sd(a[i,(1:rep)])==0&&sd(a[i,(rep+1):(rep*2)])==0){
Pvalue[i] <-"NA"
log2_FC[i]<-"NA"
}else{
y=wilcox.test(as.numeric(a[i,(1:rep)]),as.numeric(a[i,(rep+1):(rep*2)]),exact=FALSE)
Pvalue[i]<-y$p.value
log2_FC[i]<-log2((mean(as.numeric(a[i,(1:rep)]))+0.001)/(mean(as.numeric(a[i,(rep+1):(rep*2)]))+0.001))
fdr[i]=p.adjust(Pvalue[i], "BH")
}
}
}

# 在原文件后面加入log2FC,p value和FDR,共3列;
out<-cbind(log2_FC,Pvalue,fdr)

colnames(out) = paste(paste(Desep_group[1],Desep_group[2],sep = "_"),colnames(out),sep = "_")
# out$tax=otu_table$compound
head(out)
x = as.data.frame(out)
row.names(x) = row.names(AAA)
head(x)
# WT <-subset(out,fdr < 0.05 & log2_FC != "NA")
# dim(WT)
# head(WT)

if (ii ==1) {
table =x
}
if (ii != 1) {

table = cbind(table,x)
}

}

head(table)
norm1 = norm %>%
t() %>% as.data.frame()
iris.split <- split(norm1,as.factor(sub_design$Group))
iris.apply <- lapply(iris.split,function(x)colMeans(x))
# 组合结果
iris.combine <- do.call(rbind,iris.apply)
norm2= t(iris.combine)
str(norm2)
norm2 = as.data.frame(norm2)
x = cbind(AA,table)
x = cbind(x,norm2)
if (!is.null(ps@tax_table)) {
taxonomy = as.data.frame(vegan_tax(ps))
taxonomy$id= rownames(taxonomy)
x1 = merge(x,taxonomy,by = "row.names",all.x = TRUE)
head(x1)

} else {
x1 = x
}

return(x1)

}

(0)

相关推荐