堆叠柱状图顺序排列及其添加合适条块标签

Tao Wen

2018年12月17日

写在前面：人生嘛，不就是这样，总会有高兴和不高兴，积极和消沉嘛！即便晚上过成了白天，白天过成了晚上。但事情总会过去，有缺憾才完美。

基于phylosep对扩增子下游分析

R语言绘制门类水平堆叠柱状图

有些日子没有更新了，心里面惦记着啥时候推送两篇，今天终于逃课，坐在这里给大家送点干活；之前有人推荐我使用markdown来编辑推送稿，我就看了一下，花里胡哨的东西没怎么学会，大家凑活着看，慢慢的我也就能做一篇整齐齐的，清爽的推送。

我也就不做过多的解释了，大家都明白

library("ggalluvial")library("alluvial")library("phyloseq")library("ggplot2")library("dplyr")library("biomformat")library("reshape2")library("plyr")

基于phylosep和dplyr对数据进行输入、预处理和整理

数据形式

# knitr::kable(# Taxonomies1[1:10,],# caption = "A knitr kable"# )

构造排序因子

#按照分组求和by_cyl <- group_by(Taxonomies1, SampleType,Genus) zhnagxu2 = dplyr :: summarise(by_cyl, sum(Abundance))colnames(zhnagxu2) = c("group", "Genus","Abundance")head(zhnagxu2)## # A tibble: 6 x 3## group Genus Abundance## <fct> <fct> <dbl>## 1 CF Adhaeribacter 1.55 ## 2 CF Alicyclobacillus 0.590## 3 CF Aquicella 0.256## 4 CF Azoarcus 0.546## 5 CF Bacillus 6.82 ## 6 CF Balneimonas 1.81##确定因子，这里通过求和按照从小到大的顺序得到因子##长变宽Taxonomies2 = dcast(Taxonomies1,Genus ~ Sample,value.var = "Abundance")head(Taxonomies2)## Genus CF1.fastq CF4.fastq CF5.fastq CF6.fastq CK1.fastq## 1 Adhaeribacter 0.4766949 NA 0.6363286 0.4375653 0.3774376## 2 Alicyclobacillus 0.3354520 NA NA 0.2547022 NA## 3 Aquicella 0.2560028 NA NA NA NA## 4 Arenimonas NA NA NA NA NA## 5 Azoarcus NA 0.2843602 NA 0.2612330 NA## 6 Bacillus 1.6596045 1.0663507 1.3883533 2.7037618 2.2698679Taxonomies2[is.na(Taxonomies2)] <- 0aa = Taxonomies2# head(aa)n = ncol(aa)#增加一行，为整列的均值，计算每一列的均值，2就是表示列aa[n+1]=apply(aa[,c(2:ncol(aa))],1,sum)bb<- arrange(aa, V14)head(bb)## Genus CF1.fastq CF4.fastq CF5.fastq CF6.fastq CK1.fastq## 1 Aquicella 0.2560028 0 0 0 0.0000000## 2 Pseudidiomarina 0.0000000 0 0 0 0.2673516## 3 Janthinobacterium 0.0000000 0 0 0 0.2725938## 4 Variovorax 0.2913136 0 0 0 0.0000000## 5 Pedomicrobium 0.0000000 0 0 0 0.0000000## 6 Luteimonas 0.0000000 0 0 0 0.3355001bb = bb[c(1,ncol(bb))]cc<- arrange(bb, desc(V14))# head(cc)

因子排序变量查看

cc$Genus = as.character(cc$Genus)cc$Genus = as.factor(cc$Genus)cc$Genus

开始对作图分类水平进行按照平均丰度的排序

zhnagxu2$Genus = factor(zhnagxu2$Genus,order = T,levels = cc$Genus)zhnagxu3 = plyr::arrange(zhnagxu2,desc(Genus))head(zhnagxu3)## # A tibble: 6 x 3## group Genus Abundance## <fct> <ord> <dbl>## 1 CF Aquicella 0.256## 2 CK Pseudidiomarina 0.267## 3 CK Janthinobacterium 0.273## 4 CF Variovorax 0.291## 5 OF Pedomicrobium 0.301## 6 CK Luteimonas 0.336# ##制作标签坐标，标签位于顶端# Taxonomies_x = ddply(zhnagxu3,"group", transform, label_y = cumsum(Abundance))# head(Taxonomies_x )#标签位于中部Taxonomies_x = ddply(zhnagxu3,"group", transform, label_y = cumsum(Abundance) - 0.5*Abundance)head(Taxonomies_x,20 )## group Genus Abundance label_y## 1 CF Aquicella 0.2560028 0.1280014## 2 CF Variovorax 0.2913136 0.4016596## 3 CF Azoarcus 0.5455932 0.8201130## 4 CF Alicyclobacillus 0.5901542 1.3879867## 5 CF Opitutus 0.6431613 2.0046444## 6 CF Pseudoxanthomonas 0.3707627 2.5116064## 7 CF Pontibacter 0.5339744 2.9639750## 8 CF Rubrivivax 0.3317536 3.3968389## 9 CF Fimbriimonas 0.7129691 3.9192003## 10 CF Rhodobacter 0.5965841 4.5739768## 11 CF Hydrogenophaga 0.7701422 5.2573400## 12 CF Candidatus Solibacter 0.2603162 5.7725692## 13 CF Thauera 0.2606635 6.0330590## 14 CF Skermanella 0.6073668 6.4670742## 15 CF Flavisolibacter 1.4495684 7.4955417## 16 CF Devosia 1.5499034 8.9952776## 17 CF Geobacter 0.8809867 10.2107227## 18 CF Adhaeribacter 1.5505888 11.4265104## 19 CF Balneimonas 1.8123620 13.1079858## 20 CF Sphingobium 0.9007005 14.4645170

添加标签按照堆叠柱子的宽度进行选择是否需要在柱子上添加标签

Taxonomies_x$label = Taxonomies_x$Genus#使用循环将堆叠柱状图柱子比较窄的别写标签，仅仅宽柱子写上标签for(i in 1:nrow(Taxonomies_x)){ if(Taxonomies_x[i,3] > 5){ Taxonomies_x[i,5] = Taxonomies_x[i,5] }else{ Taxonomies_x[i,5] = NA }}

定义需要所需要的颜色

出图

##普通柱状图p = china_barplots <- ggplot(Taxonomies_x , aes(x = group, y = Abundance, fill = Genus, order = Genus)) + geom_bar(stat = "identity",width = 0.5) + scale_fill_manual(values = Phylum_colors) + theme(axis.title.x = element_blank()) + theme(legend.text=element_text(size=6)) + scale_y_continuous(name = "Abundance (%)")+ geom_text(aes(y = label_y, label = label ),size = 4)print(china_barplots)## Warning: Removed 81 rows containing missing values (geom_text).

p =p+theme_bw()+ scale_y_continuous(expand = c(0,0))+ #geom_hline(aes(yintercept=0), colour="black", linetype=2) + #geom_vline(aes(xintercept=0), colour="black", linetype="dashed") + #scale_fill_manual(values = mi, guide = guide_legend(title = NULL))+ theme( panel.grid.major=element_blank(), panel.grid.minor=element_blank(), plot.title = element_text(vjust = -8.5,hjust = 0.1), axis.title.y =element_text(size = 20,face = "bold",colour = "black"), axis.title.x =element_text(size = 24,face = "bold",colour = "black"), axis.text = element_text(size = 20,face = "bold"), axis.text.x = element_text(colour = "black",size = 14), axis.text.y = element_text(colour = "black",size = 14), legend.text = element_text(size = 10,face = "bold") #legend.position = "none"#是否删除图例 ) ## Scale for 'y' is already present. Adding another scale for 'y', which## will replace the existing scale.p## Warning: Removed 81 rows containing missing values (geom_text).

#ggsave("./result_and_script/a4_分门别类冲击图/属水平水平柱状图.pdf", p, width = 10, height =8 )

下面开始加上分组之间的连线，方便我们进行横向比较

参考的是冲击图，这里我遇到的困难是如何设置连线流动的映射，我是人工构建的一套流动映射，可能不是最好的解决方案，好的一点无序改动，跑就行了。

##柱状图冲击图#stratum定义堆叠柱状图柱子内容，以weight定义柱子长度，alluvium定义连线head(Taxonomies_x )## group Genus Abundance label_y label## 1 CF Aquicella 0.2560028 0.1280014 <NA>## 2 CF Variovorax 0.2913136 0.4016596 <NA>## 3 CF Azoarcus 0.5455932 0.8201130 <NA>## 4 CF Alicyclobacillus 0.5901542 1.3879867 <NA>## 5 CF Opitutus 0.6431613 2.0046444 <NA>## 6 CF Pseudoxanthomonas 0.3707627 2.5116064 <NA>cs = Taxonomies_x $Genuscs1 = cs#提取真正的因子的数量lengthfactor = length(levels(cs1))#提取每个因子对应的数量cs3 = summary (as.factor(cs1))cs4 = as.data.frame(cs3)cs4$id = row.names(cs4)#对因子进行排序df_arrange<- arrange(cs4, id)#对Taxonomies_x 对应的列进行排序Taxonomies_x1<- arrange(Taxonomies_x , Genus)head(Taxonomies_x1)## group Genus Abundance label_y label## 1 CF Kaistobacter 16.512434 66.34180 Kaistobacter## 2 CK Kaistobacter 14.791761 66.46900 Kaistobacter## 3 OF Kaistobacter 14.228975 67.11143 Kaistobacter## 4 CF Nitrospira 11.613095 52.27904 Nitrospira## 5 CK Nitrospira 9.342698 54.40177 Nitrospira## 6 OF Nitrospira 10.635413 54.67924 Nitrospira#构建flow的映射列Taxonomies_x Taxonomies_x1$ID = factor(rep(c(1:lengthfactor), cs4$cs3))#colour = "black",size = 2,,aes(color = "black",size = 0.8)p2 = ggplot(Taxonomies_x1, aes(x = group, stratum = Genus, alluvium = ID, weight = Abundance, fill = Genus, label = Genus)) + geom_flow(stat = "alluvium", lode.guidance = "rightleft", color = "black",size = 0.2,width = 0.45,alpha = .2) + geom_bar(width = 0.45)+ geom_stratum(width = 0.45,size = 0.1) + #geom_text(stat = "stratum", size = 3) + #theme(legend.position = "none") + scale_fill_manual(values = Phylum_colors)+ #ggtitle("fow_plot")+ #scale_x_discrete(limits = c("CK1","CK3","CK5","CK7","CK9","CK11","CK13","CK15","CK17","CK19"))+ geom_text(aes(y = label_y, label = label ),size = 4)+ labs(x="group", y="Relative abundancce (%)", title="")p2 =p2+theme_bw()+ scale_y_continuous(expand = c(0,0))+ #geom_hline(aes(yintercept=0), colour="black", linetype=2) + #geom_vline(aes(xintercept=0), colour="black", linetype="dashed") + #scale_fill_manual(values = mi, guide = guide_legend(title = NULL))+ theme( panel.grid.major=element_blank(), panel.grid.minor=element_blank(), plot.title = element_text(vjust = -8.5,hjust = 0.1), axis.title.y =element_text(size = 20,face = "bold",colour = "black"), axis.title.x =element_text(size = 24,face = "bold",colour = "black"), axis.text = element_text(size = 20,face = "bold"), axis.text.x = element_text(colour = "black",size = 14), axis.text.y = element_text(colour = "black",size = 14), legend.text = element_text(size = 10,face = "bold") #legend.position = "none"#是否删除图例 ) p2## Warning: The aesthetic `weight` is deprecated.## Pass arguments to `y` instead.## Warning: The aesthetic `weight` is deprecated.## Pass arguments to `y` instead.## Warning: Removed 81 rows containing missing values (geom_text).

#ggsave("./result_and_script/a4_/flow_plot_bar.pdf", p2, width = 10, height =8 )

分类堆叠柱状图顺序排列及其添加合适条块标签