library(ggplot2) library(scales) library(gridExtra) library(grid) ################## ### BAR COLORS ### ################## gg_color_hue <- function(n) { hues = seq(15, 375, length = n + 1) hcl(h = hues, l = 65, c = 100)[1:n] } cols = gg_color_hue(9) uncorrected = cols[1] hapog = cols[2] nextpolish = cols[4] ntedit = cols[5] pilon = cols[6] ####################### ### DATA FORMATTING ### ####################### dat = read.csv('human_genome.csv') colnames(dat)[1] = "Tool" dat$Tool = factor(dat$Tool, levels=c("Uncorrected", "Hapo-G", "NextPolish", "ntEdit", "Pilon")) ########################### ### 100% identity pairs ### ########################### pairs_top = ggplot(data=dat, aes(x=Tool, y=X100p_pairs)) + geom_histogram(stat="identity", position=position_dodge2(width = 0.9, preserve = "single"), color="black", fill=c(uncorrected, hapog, nextpolish, ntedit, pilon)) + theme_bw() + theme(axis.text = element_text(size = 14), axis.title = element_text(size = 14),legend.text = element_text(size = 18),plot.title = element_text(size = 18)) + theme(legend.position = "none") + xlab("") + ylab("") + scale_y_continuous(breaks=c(525000000, 550000000, 575000000), labels=c("525", "550", "575")) + coord_cartesian(ylim=c(525000000, 575000000)) pairs_bottom = ggplot(data=dat, aes(x=Tool, y=X100p_pairs)) + geom_histogram(stat="identity", position=position_dodge2(width = 0.9, preserve = "single"), color="black", fill=c(uncorrected, hapog, nextpolish, ntedit, pilon)) + theme_bw() + theme(axis.text = element_text(size = 14), axis.title = element_text(size = 14),legend.text = element_text(size = 18),plot.title = element_text(size = 18)) + theme(legend.position = "none") + xlab("") + ylab("") + scale_y_continuous(breaks=c(0, 100000000, 200000000, 300000000, 400000000, 500000000, 600000000), labels=c("0", "100", "200", "300", "400", "500", "600")) pairs = grid.arrange(pairs_top, pairs_bottom, heights=c(2, 3), ncol=1, nrow=2, left=textGrob("Million of perfectly mapped pairs", rot=90, vjust=1)) ############# ### EXONS ### ############# exons_top = ggplot(data=dat, aes(x=Tool, y=X100p_exons)) + geom_histogram(stat="identity", position=position_dodge2(width = 0.9, preserve = "single"), color="black", fill=c(uncorrected, hapog, nextpolish, ntedit, pilon)) + theme_bw() + theme(axis.text = element_text(size = 14), axis.title = element_text(size=14),legend.text = element_text(size = 18),plot.title = element_text(size = 18)) + theme(legend.position = "none") + xlab("") + ylab("") + scale_y_continuous(label=comma) + coord_cartesian(ylim=c(359000,362000)) exons_bottom = ggplot(data=dat, aes(x=Tool, y=X100p_exons)) + geom_histogram(stat="identity", position=position_dodge2(width = 0.9, preserve = "single"), color="black", fill=c(uncorrected, hapog, nextpolish, ntedit, pilon)) + theme_bw() + theme(axis.text = element_text(size = 14), axis.title = element_text(size=14),legend.text = element_text(size = 18),plot.title = element_text(size = 18)) + theme(legend.position = "none") + xlab("") + ylab("") + scale_y_continuous(label=comma) exons = grid.arrange(exons_top, exons_bottom, heights=c(2, 3), ncol=1, nrow=2, left=textGrob("Number of exons aligned with 100% identity", rot=90, vjust=1)) ################ ### RUN TIME ### ################ dat2 = subset(dat, dat$Tool != "Uncorrected") run_time = ggplot(data=dat2, aes(x=Tool, y=run_time, fill=Tool)) + geom_histogram(stat="identity", position=position_dodge2(preserve = "single"), color="black", fill=c(hapog, nextpolish, ntedit, pilon)) + theme_bw() + theme(axis.text = element_text(size = 14), axis.title = element_text(size = 14),legend.text = element_text(size = 18),plot.title = element_text(size = 18)) + theme(legend.position = "none") + xlab("") + ylab("Total running time (hours)") + coord_cartesian(ylim=c(0, 35)) pairs <- arrangeGrob(pairs, top = textGrob("B", x = unit(0, "npc"), y = unit(1, "npc"), just=c("left","top"), gp=gpar(col="black", fontsize=18, fontfamily="Times Roman"))) exons <- arrangeGrob(exons, top = textGrob("C", x = unit(0, "npc"), y = unit(1, "npc"), just=c("left","top"), gp=gpar(col="black", fontsize=18, fontfamily="Times Roman"))) run_time <- arrangeGrob(run_time, top = textGrob("D", x = unit(0, "npc"), y = unit(1, "npc"), just=c("left","top"), gp=gpar(col="black", fontsize=18, fontfamily="Times Roman"))) g <- grid.arrange(pairs, exons, run_time, ncol=2, nrow=2) ggsave(file="human_genome_metrics.png", plot=g, width=16, height=9) ################### ### MEMORY PEAK ### ################### dat2 = subset(dat, dat$Tool != "Uncorrected") memory_peak = ggplot(data=dat2, aes(x=Tool, y=peak_memory.GB., fill=Tool)) + geom_histogram(stat="identity", position=position_dodge2(preserve = "single"), color="black", fill=c(hapog, nextpolish, ntedit, pilon)) + theme_bw() + theme(axis.text = element_text(size = 14), axis.title = element_text(size = 14),legend.text = element_text(size = 20),plot.title = element_text(size = 20)) + theme(legend.position = "none") + xlab("") + ylab("Memory peak (GB)")+ scale_y_continuous(limits=c(0, 350)) ggsave(file="human_genome_memory_peak.png", plot=memory_peak, width=16, height=9)