第一次看到如此原始的绘图代码

在运行RSeQC软件对转录组的比对好的bam文件进行质控的时候,我发现了一个很无语的现象,就是它模仿fastqc的4个质控图里面,GC含量分布,ATCG碱基比例没有问题,但是画碱基质量图的时候,所有样本都是空的。

我打开了软件出图的R代码才发现,真的好原始啊!!!

  1. pdf('1_read_quality.qual.boxplot.pdf')

  2. p0<-rep(c(2,12,22,27,32,37,41),times=c(502,1062718,8073,1306425,35457919,542565,272330)/1)

  3. p1<-rep(c(2,12,22,27,32,37,41),times=c(2857,1133839,10183,1348200,34597142,1162560,395751)/1)

  4. p2<-rep(c(2,12,22,27,32,37,41),times=c(1531,1055310,16489,1310029,8109840,27627266,530067)/1)

  5. p3<-rep(c(2,12,22,27,32,37,41),times=c(13859,960145,121042,1091350,3551363,32128720,784053)/1)

  6. p4<-rep(c(2,12,22,27,32,37,41),times=c(1,831342,222717,811763,2444363,32816629,1523717)/1)

  7. p5<-rep(c(12,22,27,32,37,41),times=c(756725,269357,693807,2111482,5112150,29707011)/1)

  8. p6<-rep(c(2,12,22,27,32,37,41),times=c(383,714875,267381,623622,1782376,4452520,30809375)/1)

  9. p7<-rep(c(2,12,22,27,32,37,41),times=c(3,699425,352083,576246,1676829,4164567,31181379)/1)

  10. p8<-rep(c(2,12,22,27,32,37,41),times=c(33,719293,393174,559110,1597976,4062411,31318535)/1)

  11. p9<-rep(c(2,12,22,27,32,37,41),times=c(33,671366,400498,557291,1556259,3951524,31513561)/1)

  12. p10<-rep(c(2,12,22,27,32,37,41),times=c(11,676272,391661,553181,1532467,3847357,31649583)/1)

  13. p11<-rep(c(2,12,22,27,32,37,41),times=c(171,707083,433602,604159,1551516,3917455,31436546)/1)

  14. p12<-rep(c(12,22,27,32,37,41),times=c(691370,452590,622665,1536494,3938074,31409339)/1)

  15. p13<-rep(c(2,12,22,27,32,37,41),times=c(15,735724,479656,648292,1525773,3996162,31264910)/1)

  16. p14<-rep(c(2,12,22,27,32,37,41),times=c(2,719720,505675,718166,1435858,3968543,31302568)/1)

  17. p15<-rep(c(12,22,27,32,37,41),times=c(684960,517462,749091,1369890,3890668,31438461)/1)

  18. p16<-rep(c(12,22,27,32,37,41),times=c(694095,521500,756687,1342136,3868719,31467395)/1)

  19. p17<-rep(c(2,12,22,27,32,37,41),times=c(1,699737,530920,784387,1322692,3884913,31427882)/1)

  20. p18<-rep(c(12,22,27,32,37,41),times=c(729153,548522,827748,1298547,3944864,31301698)/1)

  21. p19<-rep(c(2,12,22,27,32,37,41),times=c(2,728518,561195,854509,1286615,3953745,31265948)/1)

  22. p20<-rep(c(2,12,22,27,32,37,41),times=c(6,766992,570514,865237,1298635,4003661,31145487)/1)

  23. p21<-rep(c(2,12,22,27,32,37,41),times=c(35,778889,580682,876155,1308700,3988540,31117531)/1)

  24. p22<-rep(c(2,12,22,27,32,37,41),times=c(1,771143,582180,891862,1305112,4019308,31080926)/1)

  25. p23<-rep(c(2,12,22,27,32,37,41),times=c(3,742853,585301,906872,1265223,4004918,31145362)/1)

  26. p24<-rep(c(2,12,22,27,32,37,41),times=c(117,744192,580200,918959,1226368,3965246,31215450)/1)

  27. p25<-rep(c(2,12,22,27,32,37,41),times=c(3,832301,584370,927455,1218032,4009025,31079346)/1)

  28. p26<-rep(c(2,12,22,27,32,37,41),times=c(233,847184,600237,951672,1227499,4064559,30959148)/1)

  29. p27<-rep(c(2,12,22,27,32,37,41),times=c(32,822081,603814,951215,1220279,4032329,31020782)/1)

  30. p28<-rep(c(12,22,27,32,37,41),times=c(815925,598461,937316,1214674,4005228,31078928)/1)

  31. p29<-rep(c(2,12,22,27,32,37,41),times=c(23,812733,600813,939699,1217753,3990265,31089246)/1)

  32. p30<-rep(c(12,22,27,32,37,41),times=c(822835,599345,937065,1218164,4000554,31072569)/1)

  33. p31<-rep(c(2,12,22,27,32,37,41),times=c(3,819821,601435,943129,1215190,3978099,31092855)/1)

  34. p32<-rep(c(2,12,22,27,32,37,41),times=c(75,816107,597396,934031,1211682,3980719,31110522)/1)

  35. p33<-rep(c(2,12,22,27,32,37,41),times=c(1,828263,601197,939752,1218438,4002214,31060667)/1)

  36. p34<-rep(c(2,12,22,27,32,37,41),times=c(12,841128,607539,951096,1228161,4029001,30993595)/1)

  37. p35<-rep(c(2,12,22,27,32,37,41),times=c(24,878053,618430,967778,1246359,4101761,30838127)/1)

  38. p36<-rep(c(2,12,22,27,32,37,41),times=c(3,874809,626513,982694,1256711,4156107,30753695)/1)

  39. p37<-rep(c(2,12,22,27,32,37,41),times=c(86,839266,618442,970032,1247703,4122919,30852084)/1)

  40. p38<-rep(c(2,12,22,27,32,37,41),times=c(1,845991,620542,972963,1254834,4118333,30837868)/1)

  41. p39<-rep(c(12,22,27,32,37,41),times=c(856500,617056,966740,1251736,4123476,30835024)/1)

  42. p40<-rep(c(2,12,22,27,32,37,41),times=c(6,882873,630955,987617,1260948,4142945,30745188)/1)

  43. p41<-rep(c(2,12,22,27,32,37,41),times=c(316,871153,633915,996778,1263813,4139830,30744682)/1)

  44. p42<-rep(c(2,12,22,27,32,37,41),times=c(2,870548,627710,984112,1266996,4125523,30775487)/1)

  45. p43<-rep(c(2,12,22,27,32,37,41),times=c(6,895056,636733,1002172,1298570,4163654,30654055)/1)

  46. p44<-rep(c(2,12,22,27,32,37,41),times=c(61,902873,643684,1006133,1317883,4158298,30621107)/1)

  47. p45<-rep(c(2,12,22,27,32,37,41),times=c(71,908831,647831,1022162,1376023,4160673,30534109)/1)

  48. p46<-rep(c(2,12,22,27,32,37,41),times=c(9,921965,654815,1028952,1415443,4149460,30478685)/1)

  49. p47<-rep(c(2,12,22,27,32,37,41),times=c(82,947712,661661,1036930,1437652,4168232,30396571)/1)

  50. p48<-rep(c(2,12,22,27,32,37,41),times=c(358,962195,673971,1061641,1463369,4241910,30244667)/1)

  51. p49<-rep(c(2,12,22,27,32,37,41),times=c(202,962146,678263,1061091,1465059,4231003,30249297)/1)

  52. p50<-rep(c(2,12,22,27,32,37,41),times=c(60,969956,681561,1067820,1477914,4255726,30188307)/1)

  53. p51<-rep(c(2,12,22,27,32,37,41),times=c(192,982034,686223,1075981,1495114,4309008,30086815)/1)

  54. p52<-rep(c(2,12,22,27,32,37,41),times=c(3,947136,677766,1058639,1477691,4250457,30217809)/1)

  55. p53<-rep(c(2,12,22,27,32,37,41),times=c(187,959598,678831,1068157,1493716,4242767,30180424)/1)

  56. p54<-rep(c(2,12,22,27,32,37,41),times=c(6,947238,676177,1057547,1482063,4223045,30231761)/1)

  57. p55<-rep(c(2,8,12,22,27,32,37,41),times=c(233,1,986433,686760,1069117,1517604,4228366,30123453)/1)

  58. p56<-rep(c(2,12,22,27,32,37,41),times=c(4,942773,685808,1070437,1546963,4169028,30191006)/1)

  59. p57<-rep(c(2,8,12,22,27,32,37,41),times=c(7,1,998837,691096,1078339,1602279,4225038,30004375)/1)

  60. p58<-rep(c(2,8,12,22,27,32,37,41),times=c(43,1,1031250,714125,1106725,1658704,4260229,29822723)/1)

  61. p59<-rep(c(2,12,22,27,32,37,41),times=c(77,1000243,712136,1097526,1686375,4200641,29890620)/1)

  62. p60<-rep(c(2,12,22,27,32,37,41),times=c(3,979646,706717,1083447,1685018,4131352,29994922)/1)

  63. p61<-rep(c(2,12,22,27,32,37,41),times=c(62,992888,703124,1076733,1708177,4095308,29998194)/1)

  64. p62<-rep(c(2,12,22,27,32,37,41),times=c(43,965056,704746,1068774,1712672,4078353,30038121)/1)

  65. p63<-rep(c(2,12,22,27,32,37,41),times=c(92,994758,710866,1064544,1728702,4093538,29968571)/1)

  66. p64<-rep(c(2,8,12,22,27,32,37,41),times=c(51,1,986215,707587,1083456,1746841,4142878,29887209)/1)

  67. p65<-rep(c(2,8,12,22,27,32,37,41),times=c(82,2,1001793,714133,1078237,1740022,4117488,29895081)/1)

  68. p66<-rep(c(2,8,12,22,27,32,37,41),times=c(2,2,1038683,738742,1097344,1765601,4183929,29714962)/1)

  69. p67<-rep(c(2,8,12,22,27,32,37,41),times=c(40,1,1021258,747209,1091011,1790945,4201949,29679083)/1)

  70. boxplot(p0,p1,p2,p3,p4,p5,p6,p7,p8,p9,p10,p11,p12,p13,p14,p15,p16,p17,p18,p19,p20,p21,p22,p23,p24,p25,p26,p27,p28,p29,p30,p31,p32,p33,p34,p35,p36,p37,p38,p39,p40,p41,p42,p43,p44,p45,p46,p47,p48,p49,p50,p51,p52,p53,p54,p55,p56,p57,p58,p59,p60,p61,p62,p63,p64,p65,p66,p67,xlab="Position of Read(5'->3')",ylab="Phred Quality Score",outline=F)

  71. dev.off()

  72. pdf('1_read_quality.qual.heatmap.pdf')

上面的67个循环,代码就构建了67个长度为2千万的向量,对这两千万的向量画boxplot,一个向量内存约200多M,R语言本身如此低效,怪不得我都没有出图,肯定是内存溢出,挂掉了。

画出来的图应该是跟下面的差不多,大家试试看吧,如果你内存足够:

朋友们,对于这个绘图代码,大家有什么优化建议呢?

赶紧动起来吧,(๑·̀ㅂ·́)و✧加油

(0)

相关推荐