你认为alpha多样性一定要用OTU来分析吗?
写在前面
alpha多样性主要包括三方面内容:多样性,丰富度和均匀度。就扩增子数据来说,大部分的文章都是使用OTU来做多样性分析。但是长期以来我们都知道,OTU多样性并不是真正的物种。最近看到一篇文章使用门水平和属水平丰度做多样性分析,又让我想起了这个事情,大家不必执迷于OTU来计算alpha多样性。这是南医大刘星吟团队发表在Gut Microbes的文章,译文题目为:揭示肠道微生物谱的改变与孤独症谱系障碍的异常神经递质代谢活动相关。详细参见原文:Zhou Dan, Xuhua Mao, Qisha Liu, Mengchen Guo, Yaoyao Zhuang, Zhi Liu, Kun Chen, Junyu Chen, Rui Xu, Junming Tang, Lianhong Qin, Bing Gu, Kangjian Liu, Chuan Su, Faming Zhang, Yankai Xia, Zhibin Hu & Xingyin Liu. Altered gut microbial profile is associated with abnormal metabolism activity of Autism Spectrum Disorder. Gut Microbes, 1-22, doi:10.1080/19490976.2020.1747329 (2020).
可以明显看到不同分类等级alpha多样性情况不一致,门水平显著,但属水平就不显著了。
实战
我们使用刘老师的数据进行测试,从门水平和属水平进行测试,确实也是不同的结果:
ps = readRDS("../data/ps_liu.rds")
library(dplyr)
j = "Phylum"
psm = ps %>%
tax_glom(taxrank = j)
psm
result = alpha(ps = psm,inde="Shannon",Plot = TRUE )
p = result[[1]]
p
j = "Genus"
psm = ps %>%
tax_glom(taxrank = j)
psm
result = alpha(ps = psm,inde="Shannon",Plot = TRUE )
p1 = result[[1]]
p1
library('patchwork')
p + p1
欢迎加入微生信生物
快来微生信生物
alpha多样性计算函数
alpha = function(otu = NULL,tax = NULL,map = NULL,ps = NULL,group = "Group",inde="Shannon",Plot = TRUE){
# path = "./alpha/"
# dir.create(path)
library(phyloseq)
library(tidyverse)
library(vegan)
library(picante) #picante 包加载时默认同时加载 vegan
library(agricolae)
if (is.null(otu)&is.null(tax)&is.null(map)) {
ps = ps
}
if (!is.null(otu)|!is.null(tax)|!is.null(map) ) {
head(otu)
otu = as.matrix(otu)
str(otu)
tax = as.matrix(tax)
# taxa_names(tax)
head(map)
colnames(map) = gsub(group,"AA", colnames(map))
map$Group = map$AA
map$Group = as.factor(map$Group )
map$Group
# #导入进化树
# tree = read.tree("./otus.tree")
# tree
ps <- phyloseq(otu_table(otu, taxa_are_rows=TRUE),
sample_data(map) ,
tax_table(tax)
# phy_tree(tree)
)
}
ps
# map = as.data.frame(sample_data(ps))
# ##按照最小序列数抽平
# total = min(sample_sums(ps));total
# # total = min(sample_sums(ps));total
# standf = function(x,t = total)round(t*(x/sum(x)))
# ps11 = transform_sample_counts(ps,standf)
ps11 = rarefy_even_depth(ps)
mapping = sample_data(ps11)
ps11 = filter_taxa(ps11, function(x) sum(x ) >0 , TRUE); ps11
head(mapping)
colnames(mapping) = gsub(group,"AA", colnames(mapping))
mapping$Group = mapping$AA
mapping$Group = as.factor(mapping$Group )
mapping$Group
vegan_otu <- function(physeq){
OTU <- otu_table(physeq)
if(taxa_are_rows(OTU)){
OTU <- t(OTU)
}
return(as(OTU,"matrix"))
}
count = as.data.frame(t(vegan_otu(ps11)))
head(count)
# 加载VEGAN包
library(vegan)
alpha=vegan::diversity(count, "shannon")
x = t(count) ##转置,行为样本,列为OTU
head(x)
##核心,计算两个指标
Shannon = vegan::diversity(x) ##默认为shannon
Shannon
Inv_Simpson <- vegan::diversity(x, index = "invsimpson")
Inv_Simpson
#计算OTU数量
S <- vegan::specnumber(x);S ##每个样本物种数。等价于S2 = rowSums(x>0)
S2 = rowSums(x>0)
#多样性指标:均匀度Pielou_evenness,Simpson_evenness
Pielou_evenness <- Shannon/log(S)
Simpson_evenness <- Inv_Simpson/S
est <- estimateR(x)
est <- estimateR(x)
Richness <- est[1, ]
Chao1 <- est[2, ]
ACE <- est[4, ]
report = cbind(Shannon, Inv_Simpson, Pielou_evenness, Simpson_evenness,
Richness, Chao1,ACE) #不同列进行合并
head(report)
index = merge(mapping,report , by="row.names",all=F)
head(index)
colnames(index)
# FileName <- paste(path,"./alpha_diversity.csv", sep = "")
# write.csv(index,FileName,sep = "")
#
#
# if (plot == FALSE ) {NULL}
if (Plot == TRUE ) {
Mytheme <- theme_bw()+
# scale_fill_manual(values = mi, guide = guide_legend(title = NULL))+
theme(
panel.grid.major=element_blank(),
panel.grid.minor=element_blank(),
plot.title = element_text(vjust = -8.5,hjust = 0.1),
axis.title.y =element_text(size = 20,face = "bold",colour = "black"),
axis.title.x =element_text(size = 24,face = "bold",colour = "black"),
axis.text = element_text(size = 20,face = "bold"),
axis.text.x = element_text(colour = "black",size = 14),
axis.text.y = element_text(colour = "black",size = 14),
legend.text = element_text(size = 15,face = "bold"),
legend.position = "none"#是否删除图例
)
# head(data_wt )
data_wt = index
data_wt$group = data_wt$Group
# alpha = "Shannon"
# data_wt = as.matrix(data_wt)
data_wt[inde]
#构造待分析的子数据框
ss <- data_wt[inde]
colnames(ss) <- c("count")
ss$group = data_wt$group
name_i = colnames(data_wt[inde])
#求取均值和方差
wen1 = as.data.frame(tapply(as.vector(as.matrix(data_wt[inde])),data_wt$group,mean,na.rm=TRUE))
wen2 = as.data.frame(tapply(as.vector(as.matrix(data_wt[inde])),data_wt$group,sd,na.rm=TRUE))
went = cbind(wen1,wen2)
colnames(went) = c("mean" ,"SD")
went
#进行方差检验 下面wtx3为提取的p值
model<-aov(count ~ group, data= ss)#方差分析
wtx1 = summary(model)
wtx2 = wtx1[[1]]
wtx3 = wtx2[5]#
# out <- LSD.test(model,"group", p.adj="none")#进行多重比较,不矫正P值
# aa = out$group#结果显示:标记字母法
# aa$group = row.names(aa)
# aa
# ? duncan.test
out <-duncan.test (model,"group")
aa = out$groups# 查看每个组的label
aa$group = row.names(aa)
stat = aa
aa
wentao = merge(aa,went, by="row.names",all=F)
wentao
# FileName <- paste(name_i,"_aov_bar", ".csv", sep = "_")
# write.csv(wentao,FileName,quote = F)
# colnames(wentao) = c(colnames(wentao[1:4]),"mean" ,"SD")
#使用的tidyverse函数,对数据框添加两列,目的为柱状图添加bar
aa = mutate(wentao, ymin = mean - SD, ymax = mean + SD)
a = max(aa$mean)*1.5##用于设置y轴最大值
### 出图柱状图
p = ggplot(aa , aes(x = group, y = mean,colour= group)) +
geom_bar(aes(colour= group,fill = group),stat = "identity", width = 0.4,position = "dodge") +
geom_errorbar(aes(ymin=ymin,
ymax=ymax),
colour="black",width=0.1,size = 1)+
# geom_hline(aes(yintercept=mean(as.vector(as.matrix(data_wt[i])))), colour="black", linetype=2) +
# geom_vline(aes(xintercept=0), colour="black", linetype="dashed") +
scale_y_continuous(expand = c(0,0),limits = c(0,a))+
labs(x=paste(name_i,"", sep = ""),
y =name_i )
p
p = p + geom_text(aes(label = groups,y=ymax, x = group,vjust = -0.3,size = 6))
p
#as.vector(as.matrix(data_wt[i]))为进行差异分析的一组数据
p=p+Mytheme
p
if (length(unique(data_wt$group))>3){ p=p+theme(axis.text.x=element_text(angle=45,vjust=1, hjust=1))}
p
# FileName <- paste(path,name_i,"_aov_bar", ".pdf", sep = "")
# # library("Cairo")
# ggsave(FileName, p, width = 10, height = 8)
# FileName1 <- paste(path,name_i,"_aov_bar", ".jpg", sep = "")
# # library("Cairo")
# ggsave(FileName1, p, width = 10, height = 8)
return(list(p,plotdata = aa,allindex = index ))
}
}