library(ggplot2) library(scales) library(gridExtra) library(grid) ################## ### BAR COLORS ### ################## gg_color_hue <- function(n) { hues = seq(15, 375, length = n + 1) hcl(h = hues, l = 65, c = 100)[1:n] } cols = gg_color_hue(8) hapog_low = rep(cols[1], 6) hapog = rep(cols[2], 6) hypo = rep(cols[3], 6) nextpolish = rep(cols[4], 6) ntedit = rep(cols[5], 6) pilon = rep(cols[6], 6) polca = rep(cols[7], 6) racon = rep(cols[8], 6) addline_format <- function(x,...){ gsub('\\s','\n',x) } ####################### ### DATA FORMATTING ### ####################### dat = read.csv('hapog-low.figure.csv') colnames(dat)[1] = "Tool" dat$Tool = factor(dat$Tool, levels=c("Hapo-G 25X", "Hapo-G 50X", "HyPo 50X", "NextPolish 50X", "ntEdit 50X", "Pilon 50X", "POLCA 50X", "Racon 50X")) dat$Round = factor(dat$Round, levels=as.character(unique(dat$Round))) ##################### ### PHASED SNP ### ##################### phased_top = ggplot(data=dat, aes(x=Tool, y=phased, group=Round)) + geom_histogram(stat="identity", position=position_dodge2(width = 0.9, preserve = "single"), color="black", fill=c(hapog_low, hapog, hypo, nextpolish, ntedit, pilon, polca, racon)) + theme_bw() + theme(axis.text = element_text(size = 13), axis.title = element_text(size = 13),legend.text = element_text(size = 18),plot.title = element_text(size = 18)) + theme(legend.position = "none") + xlab("") + ylab("") + scale_x_discrete(labels=addline_format(c("Hapo-G 25X", "Hapo-G 50X", "HyPo 50X", "NextPolish 50X", "ntEdit 50X", "Pilon 50X", "POLCA 50X", "Racon 50X")))+ scale_y_continuous(breaks=c(0, 450000, 470000, 490000), labels=c("0", "450k", "470k", "490k")) + coord_cartesian(ylim=c(440000, 490000)) phased_bottom = ggplot(data=dat, aes(x=Tool, y=phased, group=Round)) + geom_histogram(stat="identity", position=position_dodge2(width = 0.9, preserve = "single"), color="black", fill=c(hapog_low, hapog, hypo, nextpolish, ntedit, pilon, polca, racon)) + theme_bw() + theme(axis.text = element_text(size = 13), axis.title = element_text(size = 13),legend.text = element_text(size = 18),plot.title = element_text(size = 18)) + theme(legend.position = "none") + xlab("") + ylab("") + scale_x_discrete(labels=addline_format(c("Hapo-G 25X", "Hapo-G 50X", "HyPo 50X", "NextPolish 50X", "ntEdit 50X", "Pilon 50X", "POLCA 50X", "Racon 50X")))+ scale_y_continuous(breaks=c(0, 100000, 200000, 300000, 400000, 500000), labels=c("0", "100k", "200k", "300k", "400k", "500k")) phased = grid.arrange(phased_top, phased_bottom, heights=c(2, 3), ncol=1, nrow=2, left=textGrob("Number of phased variants", rot=90, vjust=1, gp = gpar(fontsize = 18))) ############################ ### CHAINS oF PAHSED SNP ### ############################ sup3SNP_top = ggplot(data=dat, aes(x=Tool, y=sup3SNP, group=Round)) + geom_histogram(stat="identity", position=position_dodge2(width = 0.9, preserve = "single"), color="black", fill=c(hapog_low, hapog, hypo, nextpolish, ntedit, pilon, polca, racon)) + theme_bw() + theme(axis.text = element_text(size = 13), axis.title = element_text(size=13),legend.text = element_text(size = 18),plot.title = element_text(size = 18)) + theme(legend.position = "none") + xlab("") + ylab("") + scale_x_discrete(labels=addline_format(c("Hapo-G 25X", "Hapo-G 50X", "HyPo 50X", "NextPolish 50X", "ntEdit 50X", "Pilon 50X", "POLCA 50X", "Racon 50X")))+ scale_y_continuous(breaks=c(0, 90000, 95000, 100000), labels=c("0", "90k", "95k", "100k")) + coord_cartesian(ylim=c(90000, 101000)) sup3SNP_bottom = ggplot(data=dat, aes(x=Tool, y=sup3SNP, group=Round)) + geom_histogram(stat="identity", position=position_dodge2(width = 0.9, preserve = "single"), color="black", fill=c(hapog_low, hapog, hypo, nextpolish, ntedit, pilon, polca, racon)) + theme_bw() + theme(axis.text = element_text(size = 13), axis.title = element_text(size=13),legend.text = element_text(size = 18),plot.title = element_text(size = 18)) + theme(legend.position = "none") + xlab("") + ylab("") + scale_x_discrete(labels=addline_format(c("Hapo-G 25X", "Hapo-G 50X", "HyPo 50X", "NextPolish 50X", "ntEdit 50X", "Pilon 50X", "POLCA 50X", "Racon 50X")))+ scale_y_continuous(breaks=c(0, 25000, 50000, 75000, 100000), labels=c("0", "25k", "50k", "75k", "100k")) sup3SNP = grid.arrange(sup3SNP_top, sup3SNP_bottom, heights=c(2, 3), ncol=1, nrow=2, left=textGrob("Number of phased chains (>= 3 variants)", rot=90, vjust=1, gp = gpar(fontsize = 18))) ################ ### RUN TIME ### ################ dat2 = subset(dat, dat$Tool != "Uncorrected") run_time = ggplot(data=dat2, aes(x=Tool, y=elapsed, fill=Tool, group=Round)) + geom_histogram(stat="identity", position=position_dodge(), color="black", fill=c(hapog_low, hapog, hypo, nextpolish, ntedit, pilon, polca, racon)) + theme_bw() + theme(axis.text = element_text(size = 13), axis.title = element_text(size = 18),legend.text = element_text(size = 18),plot.title = element_text(size = 18)) + theme(legend.position = "none") + xlab("") + ylab("Total running time (minutes)")+ scale_x_discrete(labels=addline_format(c("Hapo-G 25X", "Hapo-G 50X", "HyPo 50X", "NextPolish 50X", "ntEdit 50X", "Pilon 50X", "POLCA 50X", "Racon 50X")))+ scale_y_continuous(limits=c(0, 1000)) phased <- arrangeGrob(phased, top = textGrob("A", x = unit(0, "npc"), y = unit(1, "npc"), just=c("left","top"), gp=gpar(col="black", fontsize=18, fontfamily="Times Roman"))) sup3SNP <- arrangeGrob(sup3SNP, top = textGrob("B", x = unit(0, "npc"), y = unit(1, "npc"), just=c("left","top"), gp=gpar(col="black", fontsize=18, fontfamily="Times Roman"))) run_time <- arrangeGrob(run_time, top = textGrob("C", x = unit(0, "npc"), y = unit(1, "npc"), just=c("left","top"), gp=gpar(col="black", fontsize=18, fontfamily="Times Roman"))) g <- grid.arrange(phased, sup3SNP, run_time, ncol=2, nrow=2) ggsave(file="Figure_lowcov_Solanum_metrics.png", plot=g, width=16, height=9)