library(ggplot2) library(scales) library(gridExtra) library(grid) ################## ### BAR COLORS ### ################## gg_color_hue <- function(n) { hues = seq(15, 375, length = n + 1) hcl(h = hues, l = 65, c = 100)[1:n] } cols = gg_color_hue(9) uncorrected = cols[1] apollo = cols[9] # This one was added later so it gets 9th color to not change the other ones hapog = rep(cols[2], 6) hypo = rep(cols[3], 6) nextpolish = rep(cols[4], 6) ntedit = rep(cols[5], 6) pilon = rep(cols[6], 6) polca = rep(cols[7], 6) racon = rep(cols[8], 6) ####################### ### DATA FORMATTING ### ####################### dat = read.csv('Arabidopsis_results.csv') colnames(dat)[1] = "Tool" dat$Tool = factor(dat$Tool, levels=c("Uncorrected", "Apollo", "Hapo-G", "HyPo", "NextPolish", "ntEdit", "Pilon", "POLCA", "Racon")) dat$Round = factor(dat$Round, levels=as.character(unique(dat$Round))) dat$errors_per_100kb = dat$MismatchesPer100kb + dat$IndelsPer100kb ################## ### ERROR RATE ### ################## errors_top = ggplot(data=dat, aes(x=Tool, y=errors_per_100kb, group=Round)) + geom_col(position=position_dodge2(width = 0.9, preserve = "single"), color="black", fill=c(uncorrected, apollo, hapog, hypo, nextpolish, ntedit, pilon, polca, racon)) + theme_bw() + theme(axis.text = element_text(size = 12), axis.title = element_text(size = 12),legend.text = element_text(size = 18),plot.title = element_text(size = 18)) + theme(legend.position = "none")+ xlab("") + ylab("") + coord_cartesian(ylim=c(55,65)) errors_bottom = ggplot(data=dat, aes(x=Tool, y=errors_per_100kb, group=Round)) + geom_col(position=position_dodge2(width = 0.9, preserve = "single"), color="black", fill=c(uncorrected, apollo, hapog, hypo, nextpolish, ntedit, pilon, polca, racon)) + theme_bw() + theme(axis.text = element_text(size = 12), axis.title = element_text(size = 12),legend.text = element_text(size = 18),plot.title = element_text(size = 18)) + theme(legend.position = "none")+ xlab("") + ylab("") errors = grid.arrange(errors_top, errors_bottom, heights=c(2, 3), ncol=1, nrow=2, left=textGrob("Number of errors per 100kb", rot=90, vjust=1)) ########################### ### 100% identity pairs ### ########################### pairs_top = ggplot(data=dat, aes(x=Tool, y=X100p_pairs, group=Round)) + geom_histogram(stat="identity", position=position_dodge2(width = 0.9, preserve = "single"), color="black", fill=c(uncorrected, apollo, hapog, hypo, nextpolish, ntedit, pilon, polca, racon)) + theme_bw() + theme(axis.text = element_text(size = 12), axis.title = element_text(size = 12),legend.text = element_text(size = 18),plot.title = element_text(size = 18)) + theme(legend.position = "none") + xlab("") + ylab("") + scale_y_continuous(breaks=c(42500000, 43000000, 43500000), labels=c("42.5", "43.0", "43.5")) + coord_cartesian(ylim=c(42500000, 43500000)) pairs_bottom = ggplot(data=dat, aes(x=Tool, y=X100p_pairs, group=Round)) + geom_histogram(stat="identity", position=position_dodge2(width = 0.9, preserve = "single"), color="black", fill=c(uncorrected, apollo, hapog, hypo, nextpolish, ntedit, pilon, polca, racon)) + theme_bw() + theme(axis.text = element_text(size = 12), axis.title = element_text(size = 12),legend.text = element_text(size = 18),plot.title = element_text(size = 18)) + theme(legend.position = "none") + xlab("") + ylab("") + scale_y_continuous(breaks=c(0, 10000000, 20000000, 30000000, 40000000, 50000000), labels=c("0", "10.0", "20.0", "30.0", "40.0", "50.0")) pairs = grid.arrange(pairs_top, pairs_bottom, heights=c(2, 3), ncol=1, nrow=2, left=textGrob("Million of perfectly mapped pairs", rot=90, vjust=1)) ############# ### EXONS ### ############# exons_top = ggplot(data=dat, aes(x=Tool, y=X100p_exons, group=Round)) + geom_histogram(stat="identity", position=position_dodge2(width = 0.9, preserve = "single"), color="black", fill=c(uncorrected, apollo, hapog, hypo, nextpolish, ntedit, pilon, polca, racon)) + theme_bw() + theme(axis.text = element_text(size = 12), axis.title = element_text(size=12),legend.text = element_text(size = 18),plot.title = element_text(size = 18)) + theme(legend.position = "none") + xlab("") + ylab("") + scale_y_continuous(label=comma, breaks=c(199000, 199500, 200000)) + coord_cartesian(ylim=c(199000, 200000)) exons_bottom = ggplot(data=dat, aes(x=Tool, y=X100p_exons, group=Round)) + geom_histogram(stat="identity", position=position_dodge2(width = 0.9, preserve = "single"), color="black", fill=c(uncorrected, apollo, hapog, hypo, nextpolish, ntedit, pilon, polca, racon)) + theme_bw() + theme(axis.text = element_text(size = 12), axis.title = element_text(size=12),legend.text = element_text(size = 18),plot.title = element_text(size = 18)) + theme(legend.position = "none") + xlab("") + ylab("") + scale_y_continuous(label=comma, breaks=c(0, 50000, 100000, 150000, 200000)) + coord_cartesian(ylim=c(0, 200000)) exons = grid.arrange(exons_top, exons_bottom, heights=c(2, 3), ncol=1, nrow=2, left=textGrob("Number of exons aligned with 100% identity", rot=90, vjust=1)) ################ ### RUN TIME ### ################ dat2 = subset(dat, dat$Tool != "Uncorrected") run_time = ggplot(data=dat2, aes(x=Tool, y=run_time, fill=Tool, group=Round)) + geom_histogram(stat="identity", position=position_dodge2(preserve = "single"), color="black", fill=c(apollo,hapog, hypo, nextpolish, ntedit, pilon, polca, racon)) + theme_bw() + theme(axis.text = element_text(size = 12), axis.title = element_text(size = 12),legend.text = element_text(size = 18),plot.title = element_text(size = 18)) + theme(legend.position = "none") + xlab("") + ylab("Total running time (minutes)") + #scale_y_continuous(limits=c(0, 210)) coord_cartesian(ylim=c(0, 210)) errors <- arrangeGrob(errors, top = textGrob("A", x = unit(0, "npc"), y = unit(1, "npc"), just=c("left","top"), gp=gpar(col="black", fontsize=18, fontfamily="Times Roman"))) pairs <- arrangeGrob(pairs, top = textGrob("B", x = unit(0, "npc"), y = unit(1, "npc"), just=c("left","top"), gp=gpar(col="black", fontsize=18, fontfamily="Times Roman"))) exons <- arrangeGrob(exons, top = textGrob("C", x = unit(0, "npc"), y = unit(1, "npc"), just=c("left","top"), gp=gpar(col="black", fontsize=18, fontfamily="Times Roman"))) run_time <- arrangeGrob(run_time, top = textGrob("D", x = unit(0, "npc"), y = unit(1, "npc"), just=c("left","top"), gp=gpar(col="black", fontsize=18, fontfamily="Times Roman"))) g <- grid.arrange(errors, pairs, exons, run_time, ncol=2, nrow=2) ggsave(file="Figure_3_Arabidopsis_metrics.png", plot=g, width=16, height=9) ################### ### MEMORY PEAK ### ################### dat2 = subset(dat, dat$Tool != "Uncorrected") memory_peak = ggplot(data=dat2, aes(x=Tool, y=peak_memory.GB., fill=Tool, group=Round)) + geom_histogram(stat="identity", position=position_dodge2(preserve = "single"), color="black", fill=c(apollo, hapog, hypo, nextpolish, ntedit, pilon, polca, racon)) + theme_bw() + theme(axis.text = element_text(size = 20), axis.title = element_text(size = 20),legend.text = element_text(size = 20),plot.title = element_text(size = 20)) + theme(legend.position = "none") + xlab("") + ylab("Memory peak (GB)")+ scale_y_continuous(limits=c(0, 200)) ggsave(file="Arabidopsis_memory_peak.png", plot=memory_peak, width=16, height=9) ################### ### BUSCO ### ################### busco_C = ggplot(data=dat, aes(x=Tool, y=Busco_C, group=Round)) + geom_col(position=position_dodge2(width = 0.9, preserve = "single"), color="black", fill=c(uncorrected, apollo, hapog, hypo, nextpolish, ntedit, pilon, polca, racon)) + theme_bw() + theme(axis.text = element_text(size = 14), axis.title = element_text(size = 14),legend.text = element_text(size = 18),plot.title = element_text(size = 18)) + theme(legend.position = "none")+ xlab("") + ylab("Percentage of complete Busco genes") + coord_cartesian(ylim=c(95, 100)) busco_D = ggplot(data=dat, aes(x=Tool, y=Busco_D, group=Round)) + geom_col(position=position_dodge2(width = 0.9, preserve = "single"), color="black", fill=c(uncorrected, apollo, hapog, hypo, nextpolish, ntedit, pilon, polca, racon)) + theme_bw() + theme(axis.text = element_text(size = 14), axis.title = element_text(size = 14),legend.text = element_text(size = 18),plot.title = element_text(size = 18)) + theme(legend.position = "none")+ xlab("") + ylab("Percentage of duplicated Busco genes") + coord_cartesian(ylim=c(0, 5)) busco_F = ggplot(data=dat, aes(x=Tool, y=Busco_F, group=Round)) + geom_col(position=position_dodge2(width = 0.9, preserve = "single"), color="black", fill=c(uncorrected, apollo, hapog, hypo, nextpolish, ntedit, pilon, polca, racon)) + theme_bw() + theme(axis.text = element_text(size = 14), axis.title = element_text(size = 14),legend.text = element_text(size = 18),plot.title = element_text(size = 18)) + theme(legend.position = "none")+ xlab("") + ylab("Percentage of fragmented Busco genes") + coord_cartesian(ylim=c(0, 5)) busco_M = ggplot(data=dat, aes(x=Tool, y=Busco_M, group=Round)) + geom_col(position=position_dodge2(width = 0.9, preserve = "single"), color="black", fill=c(uncorrected, apollo, hapog, hypo, nextpolish, ntedit, pilon, polca, racon)) + theme_bw() + theme(axis.text = element_text(size = 14), axis.title = element_text(size = 14),legend.text = element_text(size = 18),plot.title = element_text(size = 18)) + theme(legend.position = "none")+ xlab("") + ylab("Percentage of missing Busco genes") + coord_cartesian(ylim=c(0, 5)) busco_C <- arrangeGrob(busco_C, top = textGrob("A", x = unit(0, "npc"), y = unit(1, "npc"), just=c("left","top"), gp=gpar(col="black", fontsize=18, fontfamily="Times Roman"))) busco_D <- arrangeGrob(busco_D, top = textGrob("B", x = unit(0, "npc"), y = unit(1, "npc"), just=c("left","top"), gp=gpar(col="black", fontsize=18, fontfamily="Times Roman"))) busco_F <- arrangeGrob(busco_F, top = textGrob("C", x = unit(0, "npc"), y = unit(1, "npc"), just=c("left","top"), gp=gpar(col="black", fontsize=18, fontfamily="Times Roman"))) busco_M <- arrangeGrob(busco_M, top = textGrob("D", x = unit(0, "npc"), y = unit(1, "npc"), just=c("left","top"), gp=gpar(col="black", fontsize=18, fontfamily="Times Roman"))) g <- grid.arrange(busco_C, busco_D, busco_F, busco_M, ncol=2, nrow=2) ggsave(file="Arabidopsis_Buscos.png", plot=g, width=16, height=9)