使用R实现批量方差分析(aov)和多重比较(LSD)

Tao Wen

2019年1月5日


引子

由我的工作来看,coding存在的意义就是无序重复机械的分析过程,让自己的时间集中在科学问题上,然而实现这个过程却花费了更多的?时间。 我们可以轻松的使用各种软件实现方差检验和多重比较。但是在同时对几百甚至几千组数据做方差分析和多重比较的时候,coding的优势就提体现出来了。

全部代码

##清空内存rm(list=ls()) #载入包
library("vegan")
library(dplyr)
library(tidyverse)
library(agricolae)
library(car)
#修改工作目录
setwd("F:/")
##############全部峰值数据来做统计#########去除磷酸之后进行分析
RET_all = read.delim("./Data_processing/a0_RET_缺失值处理clean_root_exudates.txt", header=T, row.names= 1,check.names=F) head(RET_all)
##            CK2       CK5       CK3       CK1      CK4       RS1       RS2## RE1  26632.985 25224.034  30596.48  29812.76 25972.92 225065.33  75659.57## RE2 118030.479 60185.801 212033.07 105679.97 94223.07      0.00      0.00## RE3  92672.041 65451.785  96917.73 100051.53 77912.07 689247.38 232806.81## RE4  22620.908  4666.435  18808.13   9232.97 22456.49  11528.64  11517.85## RE5  22688.856 14983.546  17679.77  16747.14 16299.52  18650.75  19104.47## RE6   9271.438  8784.055  12884.40  10313.30 10313.30      0.00      0.00##            RS3        RS5      RS4        B1        B2        B3       B4## RE1  31002.659  31484.553 26169.62  30109.59 31586.178  28028.22 30081.14## RE2      0.000      0.000     0.00      0.00     0.000      0.00     0.00## RE3 107402.115 101363.214 83245.85 151057.74 99776.930 101816.86 90471.41## RE4   4962.333   3686.371 14990.28   5080.92  8451.872  27181.51 23715.11## RE5  21119.811  14020.793 20534.05  18634.03 20593.487  23398.10 13077.19## RE6      0.000      0.000     0.00      0.00     0.000      0.00     0.00##            B5                                      compound## RE1 28979.445                            (-)-Dihydrocarveol## RE2     0.000 (2R,3S)-2-hydroxy-3-isopropylbutanedioic acid## RE3 74380.312                        1,4-Cyclohexanedione 2## RE4  6579.112             11-beta-prostaglandin-F-2-alpha 1## RE5 17367.560                                 1-Hexadecanol## RE6     0.000                     2,3-Dimethylsuccinic acidRET = RET_all[1:dim(RET_all)[2]-1]head(RET)#导入需要的作图文件design = read.table("./Data_processing/mapping_RET_all.txt", header=T, row.names= 1, sep="\t") head(design)##     group## CK5    CK## CK4    CK## CK3    CK## CK2    CK## CK1    CK## B5      Bdesign$ID = row.names(design)# 过滤数据并排序idx = rownames(design) %in% colnames(RET) sub_design = design[idx,]count = RET[ ,rownames(sub_design)]head(count)comp_all = dim(count)head(count)# 转换原始数据为百分比,norm = t(t(count)/colSums(count,na=T)) * 100 # normalization to total 100head(norm)##             CK5         CK4         CK3         CK2         CK1## RE1 0.005549065 0.013885418 0.011357306 0.009472478 0.011338921## RE2 0.013240346 0.050372725 0.078705924 0.041979565 0.040194100## RE3 0.014398817 0.041652675 0.035975518 0.032960401 0.038053388## RE4 0.001026575 0.012005496 0.006981510 0.008045514 0.003511648## RE5 0.003296248 0.008713910 0.006562666 0.008069681 0.006369571## RE6 0.001932415 0.005513607 0.004782645 0.003297546 0.003922539##              B5          B4          B3          B2          B1## RE1 0.006645724 0.011490223 0.008876583 0.007449352 0.011995785## RE2 0.000000000 0.000000000 0.000000000 0.000000000 0.000000000## RE3 0.017057299 0.034557751 0.032245560 0.023531604 0.060182030## RE4 0.001508758 0.009058563 0.008608428 0.001993307 0.002024260## RE5 0.003982824 0.004995150 0.007410215 0.004856812 0.007423875## RE6 0.000000000 0.000000000 0.000000000 0.000000000 0.000000000##             RS5         RS4         RS3         RS2         RS1## RE1 0.009075086 0.005696559 0.012207578 0.022859290 0.066628198## RE2 0.000000000 0.000000000 0.000000000 0.000000000 0.000000000## RE3 0.029216864 0.018120817 0.042290558 0.070338732 0.204044354## RE4 0.001062557 0.003263060 0.001953964 0.003479927 0.003412932## RE5 0.004041344 0.004469817 0.008316117 0.005772101 0.005521356## RE6 0.000000000 0.000000000 0.000000000 0.000000000 0.000000000norm=as.data.frame(norm)a=normhead(a)Pvalue<-c(rep(0,nrow(a)))pB<-c(rep(0,nrow(a)))pCK<-c(rep(0,nrow(a)))pRS<-c(rep(0,nrow(a)))str(pCK)##  num [1:604] 0 0 0 0 0 0 0 0 0 0 ...# 2~4列是处理组1,5~7列是处理组2;#将使用循环对每一行进行方差检验#两组表达量都是0的基因,不检验;#每一行计算得到p value和log2FC将被加入原文件的后两列;#计算log2FC时,每组均值加0.001,是为了防止分母为0导致bug;# i= 2# ab = as.numeric(a[i,1:15])# b = sub_design$group# aa = data.frame(ab,b)# y=aov(ab~b,data = aa)# Pvalue[i]<-summary(y)[[1]][,5][1]# out <- LSD.test(y,"b", p.adj="none")# bb = as.data.frame(out$group)# bb$ID = row.names(bb)# bb<- arrange(bb, row.names(bb))# pB = bb[1,2]# pCK = bb[2,2]# pRS = bb[3,2]for(i in 1:nrow(a)){  if(sum(a[i,1:5])==0&&sum(a[i,6:10])==0&&sum(a[i,11:15])==0){    Pvalue[i] <- "NA"  }else{    ab = as.numeric(a[i,1:15])    b = sub_design$group    aa = data.frame(ab,b)    y=aov(ab~b,data = aa)    Pvalue[i]<-summary(y)[[1]][,5][1]    out <- LSD.test(y,"b", p.adj="none")    bb = as.data.frame(out$group)    bb$ID = row.names(bb)    bb<- arrange(bb, row.names(bb))    pCK[i] = bb$groups[2]    pB[i] = as.character(bb[1,2])    pCK[i] = as.character(bb[2,2])    pRS[i] = as.character(bb[3,2])    pRS[i];pCK[i];pB[i]    # Dunnett<-glht(y, linfct = mcp(type = "Dunnett"),alternative = "two.side")    # out <- LSD.test(y,sub_design$group, p.adj="none")#进行多重比较,不矫正P值    # tuk=TukeyHSD(y)    # out <-scheffe.test (y,"group")      }}# Pvalueout<-cbind(a,Pvalue, pB,pCK,pRS)head(out)##             CK5         CK4         CK3         CK2         CK1## RE1 0.005549065 0.013885418 0.011357306 0.009472478 0.011338921## RE2 0.013240346 0.050372725 0.078705924 0.041979565 0.040194100## RE3 0.014398817 0.041652675 0.035975518 0.032960401 0.038053388## RE4 0.001026575 0.012005496 0.006981510 0.008045514 0.003511648## RE5 0.003296248 0.008713910 0.006562666 0.008069681 0.006369571## RE6 0.001932415 0.005513607 0.004782645 0.003297546 0.003922539##              B5          B4          B3          B2          B1## RE1 0.006645724 0.011490223 0.008876583 0.007449352 0.011995785## RE2 0.000000000 0.000000000 0.000000000 0.000000000 0.000000000## RE3 0.017057299 0.034557751 0.032245560 0.023531604 0.060182030## RE4 0.001508758 0.009058563 0.008608428 0.001993307 0.002024260## RE5 0.003982824 0.004995150 0.007410215 0.004856812 0.007423875## RE6 0.000000000 0.000000000 0.000000000 0.000000000 0.000000000##             RS5         RS4         RS3         RS2         RS1## RE1 0.009075086 0.005696559 0.012207578 0.022859290 0.066628198## RE2 0.000000000 0.000000000 0.000000000 0.000000000 0.000000000## RE3 0.029216864 0.018120817 0.042290558 0.070338732 0.204044354## RE4 0.001062557 0.003263060 0.001953964 0.003479927 0.003412932## RE5 0.004041344 0.004469817 0.008316117 0.005772101 0.005521356## RE6 0.000000000 0.000000000 0.000000000 0.000000000 0.000000000##           Pvalue pB pCK pRS## RE1 2.796373e-01  a   a   a## RE2 2.278410e-04  b   a   b## RE3 3.121200e-01  a   a   a## RE4 2.608948e-01  a   a   a## RE5 6.511946e-01  a   a   a## RE6 5.103709e-06  b   a   bRET_name = RET_all[dim(RET_all)[2]]head(RET_name)##                                          compound## RE1                            (-)-Dihydrocarveol## RE2 (2R,3S)-2-hydroxy-3-isopropylbutanedioic acid## RE3                        1,4-Cyclohexanedione 2## RE4             11-beta-prostaglandin-F-2-alpha 1## RE5                                 1-Hexadecanol## RE6                     2,3-Dimethylsuccinic acidrusult_out = merge(out ,RET_name, by="row.names",all=F)head(rusult_out)##   Row.names         CK5         CK4         CK3         CK2         CK1## 1       RE1 0.005549065 0.013885418 0.011357306 0.009472478 0.011338921## 2      RE10 0.000000000 0.000000000 0.000000000 0.000000000 0.000000000## 3     RE100 0.001650192 0.003067643 0.002522073 0.001038059 0.002182409## 4     RE101 0.006185346 0.017028862 0.029767109 0.012691608 0.072403533## 5     RE102 0.002229133 0.008797501 0.021920508 0.004811970 0.026383348## 6     RE103 0.001405335 0.002621616 0.005901619 0.002184498 0.011798804##            B5          B4          B3          B2           B1         RS5## 1 0.006645724 0.011490223 0.008876583 0.007449352 0.0119957851 0.009075086## 2 0.001245465 0.001468414 0.001217488 0.001201888 0.0004006671 0.001499867## 3 0.000000000 0.000000000 0.000000000 0.000000000 0.0000000000 0.002260963## 4 0.026783700 0.070971462 0.011183080 0.019668387 0.0648061792 0.015967222## 5 0.021370834 0.030246691 0.009708425 0.010392458 0.0244686738 0.008926288## 6 0.009331215 0.009423304 0.002036022 0.002954869 0.0090469287 0.004238941##           RS4         RS3         RS2          RS1       Pvalue pB pCK pRS## 1 0.005696559 0.012207578 0.022859290 0.0666281983 2.796373e-01  a   a   a## 2 0.001142296 0.001528522 0.001253334 0.0006690667 6.950709e-05  a   b   a## 3 0.001242421 0.003830482 0.002939149 0.0046277758 5.597853e-04  b   a   a## 4 0.007431094 0.023015901 0.019496753 0.0210254041 3.504130e-01  a   a   a## 5 0.005726721 0.007780721 0.012159299 0.0048272448 1.341319e-01  a  ab   b## 6 0.001370010 0.003022282 0.002371406 0.0034698880 2.604930e-01  a   a   a##                                compound## 1                    (-)-Dihydrocarveol## 2 2-Amino-2-norbornanecarboxylic acid 3## 3                           Analyte 172## 4                           Analyte 177## 5                           Analyte 178## 6                           Analyte 179dim(rusult_out)## [1] 604  21write.table(rusult_out,"./sub_result_and_script_allgroup//a3_RET_compounds_t检验all结果.txt",quote = FALSE,row.names = T,            col.names = NA,sep = "\t")

学习永无止境,分享永不停歇
(0)

相关推荐