library(ggplot2)
library(scales)
library(gridExtra)
library(grid)


##################
### BAR COLORS ###
##################

gg_color_hue <- function(n) {
  hues = seq(15, 375, length = n + 1)
  hcl(h = hues, l = 65, c = 100)[1:n]
}
cols = gg_color_hue(3)

hapogSR     = rep(cols[1], 6)
hapogLR     = rep(cols[2], 6)
hapogSRLR   = cols[3]


#######################
### DATA FORMATTING ###
#######################

dat = read.csv('longreads.all.res')
colnames(dat)[1] = "Tool"
dat$Tool = factor(dat$Tool, levels=c("Hapo-G", "Hapo-G_LR", "Hapo-G_SR+LR"))
dat$Round = factor(dat$Round, levels=as.character(unique(dat$Round)))


#####################
###  PHASED SNP   ###
#####################

phased_top = ggplot(data=dat, aes(x=Tool, y=phased, group=Round)) +
  geom_histogram(stat="identity", position=position_dodge2(width = 0.9, preserve = "single"), color="black", fill=c(hapogSR, hapogLR, hapogSRLR)) +
  theme_bw() +
  theme(axis.text = element_text(size = 18), axis.title = element_text(size = 18),legend.text = element_text(size = 18),plot.title = element_text(size = 18)) +
  theme(legend.position = "none") +
  xlab("") +
  ylab("") +
  scale_y_continuous(breaks=c(0, 450000, 470000, 490000), labels=c("0", "450k", "470k", "490k")) +
  coord_cartesian(ylim=c(430000, 495000))

phased_bottom = ggplot(data=dat, aes(x=Tool, y=phased, group=Round)) +
  geom_histogram(stat="identity", position=position_dodge2(width = 0.9, preserve = "single"), color="black", fill=c(hapogSR, hapogLR, hapogSRLR)) +
  theme_bw() +
  theme(axis.text = element_text(size = 18), axis.title = element_text(size = 18),legend.text = element_text(size = 18),plot.title = element_text(size = 18)) +
  theme(legend.position = "none") +
  xlab("") +
  ylab("") +
  scale_y_continuous(breaks=c(0, 100000, 200000, 300000, 400000, 500000, 600000), labels=c("0", "100k", "200k", "300k", "400k", "500k", "600k"))

phased = grid.arrange(phased_top, phased_bottom,
                      heights=c(2, 3),
                      ncol=1, nrow=2,
                      left=textGrob("Number of phased variants",
                                    rot=90,
                                    vjust=1,
                                    gp = gpar(fontsize = 18)))



################
### RUN TIME ###
################

dat2 = subset(dat, dat$Tool != "Uncorrected")

run_time = ggplot(data=dat2, aes(x=Tool, y=elapsed, fill=Tool, group=Round)) +
  geom_histogram(stat="identity", position=position_dodge2(width = 0.9, preserve = "single"), color="black", fill=c(hapogSR, hapogLR, hapogSRLR)) +
  theme_bw() +
  theme(axis.text = element_text(size = 18), axis.title = element_text(size = 18),legend.text = element_text(size = 18),plot.title = element_text(size = 18)) +
  theme(legend.position = "none") +
  xlab("") +
  ylab("Total running time (minutes)")+
  scale_y_continuous(limits=c(0, 400))

phased <- arrangeGrob(phased, top = textGrob("A", x = unit(0, "npc"),
                                             y = unit(1, "npc"), just=c("left","top"),
                                             gp=gpar(col="black", fontsize=18, fontfamily="Times Roman")))

run_time <- arrangeGrob(run_time, top = textGrob("B", x = unit(0, "npc"),
                                                 y = unit(1, "npc"), just=c("left","top"),
                                                 gp=gpar(col="black", fontsize=18, fontfamily="Times Roman")))

g <- grid.arrange(phased, run_time, ncol=1, nrow=2)
ggsave(file="HapoG-longreads.png", plot=g, width=16, height=9)