library(dplyr)
library(ggplot2)
#
### Preparing dataframe of read lengths
circulomicTablesTimed <- function(seqSummary = NULL, plotTitle, maxRead = 2e+6, xInt = 1000, yInt = 500, workDir, startTime = 0, endTime = NULL, 
                             breaksStart = 25, breaksVal = 25, binSize = 1e+4, filename, saveDir, legendPosition = "right")
{
  setwd(workDir)
  message("------- Preparing Table -------")
 
     tmp <- read.table(file = seqSummary, header = TRUE, sep = "\t") %>% 
      select(sequence_length_template, start_time, duration) %>% 
      mutate(minute = floor((start_time + duration)/60)) %>% 
      arrange(minute) %>% 
      filter(minute >= startTime & minute <= endTime) %>% 
      rename(bp_length = 1)
    lengthTable <- tmp %>% 
      select(bp_length) %>% 
      arrange(bp_length) %>% 
      group_by(bp_length) %>% 
      mutate(bin = cut(bp_length, breaks = seq(0, maxRead, by = binSize), include.lowest = FALSE, 
                       dig.lab = 0, labels = seq(10, maxRead/1000, by = 10), ordered_result = TRUE),
             bp_yield = sum(bp_length)) %>% 
      unique() %>% 
      group_by(bin) %>% 
      mutate(bin_yield = sum(bp_yield)) %>% 
      select(bin, bin_yield) %>% 
      unique()
    
  message("------- Table of lengths is created -------")
  #
  N50 <- tmp %>%
      filter(minute <= endTime) %>%
      arrange(desc(bp_length)) %>%
      mutate(bp_sum = sum(as.numeric(bp_length)), bp_cumsum = cumsum(as.numeric(bp_length))) %>%
      filter(bp_cumsum >= bp_sum * 0.5) %>%
      filter(bp_cumsum == min(bp_cumsum)) %>% 
      select(bp_length)
  message("------- N50 of minute ", paste0(endTime), " is ", paste0(N50/1000), " kb -------")
  #
### Building histogram of read-length bins vs Yield
theme_set(theme_minimal())
cols <- c("1" = "#009E73", "2" = "#CC79A7", "3" = "#0072B2", "4" = "#000000", "5" = "#999999")
histogramLength <- lengthTable %>%
  mutate(bin = as.numeric(as.character(bin))) %>%
  mutate(group = ifelse(bin <= 100, "1",
                        ifelse(bin > 100 & bin <= 200, "2",
                               ifelse(bin > 200 & bin <= 500, "3",
                                      ifelse(bin > 500 & bin < 1000, "4",
                                             ifelse(bin >= 1000, "5", NA)))))) %>%
  ggplot() +
  geom_col(aes(x = bin, y = bin_yield/1e+6, fill = group)) +    # change division value for PromethION to 1e+9
  labs(x="Read Length (kb)", y="Yield (Mb)", title = plotTitle) +
  coord_cartesian(xlim = c(0,xInt), ylim = c(0,yInt)) +
  theme(legend.position = legendPosition, panel.grid.minor = element_blank(),        
        plot.margin = margin(t = 20, r = 50, b = 20, l = 20),
        axis.text = element_text(size = 36), axis.title = element_text(size = 40),
        axis.title.y = element_text(margin = margin(t = 10, r = 20, b = 0, l = 0)),
        axis.title.x = element_text(margin = margin(t = 20, r = 10, b = 0, l = 0)),
        legend.text = element_text(size = 36, margin = margin(t = 6, r = 10, 2 - 
  scale_y_continuous(expand = c(0,0), breaks = c(seq(from = breaksStart, to = yInt, by = breaksVal))) +
  scale_x_continuous(expand = c(0,0), breaks = c(seq(from = 250, to = maxRead, by = 250))) +
  geom_vline(data = N50, aes(xintercept = bp_length/1000), color = "black", linewidth = 1.5, linetype = "dashed") +
  scale_fill_manual(values = cols, name = "Read Length",
                    labels = c(" <100 kb", " 100-199 kb", " 200-499 kb", " 500-999 kb", " >= 1 Mb"))

print(histogramLength)
setwd(saveDir)
ggsave(paste0(filename, "_timed_histogram.png"), plot = histogramLength, units = "mm", width = 297, height = 210, bg = "white")
setwd(workDir)
}
#
# Usage example
# circulomicTablesTimed(seqSummary = "/path/to/sequencing_summary_file.txt",
                #  plotTitle = "Your Plot Title",
                #  breaksStart = 250, breaksVal = 250, endTime = 3900, xInt = 750, yInt = 750,
                #  workDir = "/path/to/your/summary_file/folder",
                # filename = "filename_to_save",
                # saveDir = "/path/to/your/saving/directory")
