library(dplyr)
library(ggplot2)
library(scales)
#
poreOccupancy <- function(dutyTimeFile, runID, graphType = "", legendPosition = NULL, filename, saveDir,
                          workDir, xlim = NULL, ylim = NULL, stopPoint = NULL, averageLabel = TRUE, showLegend = TRUE)
{
### Preparing master dataframe ----
setwd(workDir)
tab_3states <- read.csv(dutyTimeFile, header = TRUE) %>%
  rename(state = 1, minutes = 2, samples = 3) %>%
  group_by(minutes) %>%
  arrange(.by_group = TRUE) %>%
  mutate(samples_total = sum(samples), samples_portion = samples/samples_total*100) %>%
  #### calculating for total available pores
  filter(state %in% c("adapter", "pore", "strand")) %>%
  mutate(available_samples = sum(samples), available_samples_portion = samples/available_samples*100) %>%
  filter(available_samples_portion != "NaN")
#
#### calculating only "sequencing" pores (adapter + strand)
tmp <- tab_3states %>%
  filter(state != "pore") %>%
  group_by(minutes) %>%
  mutate(sum_occupancy = sum(available_samples_portion)) %>%
  select(minutes, sum_occupancy) %>% unique()
if(is.null(stopPoint))
    {
  avgOccupancy <- mean(tmp$sum_occupancy)
    } else {
      tmp <- tmp %>%
        filter(minutes < (stopPoint + 1)) 
      avgOccupancy <- mean(tmp$sum_occupancy)
    }
message("Average occupancy is ", avgOccupancy/100)
#
### tabling and plotting RELATIVE occupancy: percent adapter or adapter+strand to available pores
if(is.null(stopPoint))
  {
  tab_2states <- tab_3states %>%
    filter(state %in% c("adapter", "strand")) %>%
    mutate(combined_portion = sum(available_samples_portion)) %>%
    select(minutes, combined_portion) %>%
    unique() %>% 
    left_join(., tab_3states %>% 
                filter(state == "adapter") %>% 
                select(minutes, state, available_samples_portion) %>% 
                unique(), 
              by = "minutes") 
  } else {
    tab_2states <- tab_3states %>%
      filter(state %in% c("adapter", "strand")) %>%
      mutate(combined_portion = sum(available_samples_portion)) %>%
      select(minutes, combined_portion) %>%
      unique() %>% 
      left_join(., tab_3states %>% 
                  filter(state == "adapter") %>% 
                  select(minutes, state, available_samples_portion) %>% 
                  unique(), 
                by = "minutes") %>% 
      filter(minutes <= stopPoint)
  }
#
  occupy_2states <- tab_2states %>% 
    ggplot() +
    scale_y_continuous(expand = c(0,0), breaks = seq(25, 100, by = 25), limits = c(0, 105)) +
    scale_x_continuous(expand = c(0,0)) +
    theme(axis.line = element_line(color = "#999999", linewidth = 0.5, linetype = "solid"),
          axis.text = element_text(size = 36), axis.title = element_text(size = 40),
          axis.title.y = element_text(margin = margin(t = 0, r = 20, b = 0, l = 0)),
          axis.title.x = element_text(margin = margin(t = 20, r = 0, b = 0, l = 0)),
          plot.margin = margin(t = 10, r = 40, b = 10, l = 10),  # Increase right margin if necessary
          panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
          legend.position = "top",
          legend.text = element_text(size = 32, margin = margin(t = 6, r = 0, b = 6, l = 0)), 
          legend.title = element_blank(),
          legend.box.spacing = unit(30, "points"),
          title = element_text(size = 40)) +
    geom_vline(xintercept = muxHours, color = "#999999", linetype = "dashed") +
    geom_hline(aes(color = "Mean of Adapter + Strand", yintercept = avgOccupancy), linewidth = 2, show.legend = FALSE) +
    labs(title = paste(runID), 
         x = "Time (hours)", y = "Relative Occupancy (%)") +
    {if(graphType == "point") geom_point(aes(x = minutes/60, y = combined_portion, color = "Adapter + Strand"),
                                         shape = 18, size = 1.5, show.legend = TRUE)} +
    {if(graphType == "point") geom_point(aes(x = minutes/60, y = available_samples_portion, color = "Adapter Only"), 
                                         shape = 18, size = 1.5, show.legend = TRUE)} +
    scale_color_manual(breaks = c("Adapter + Strand", "Adapter Only", "Mean of Adapter + Strand"), 
                       values = c("Adapter + Strand" = "black", "Adapter Only" = "#D55E00", "Mean of Adapter + Strand" = "#009E73")) +
    guides(color = guide_legend(override.aes = list(size = 5, shape = 16), nrow = 2)) +
    coord_cartesian(xlim = xlim, ylim = ylim) +
    {if(graphType == "smooth") geom_smooth(method = loess, color = "black")} +
    {if(graphType == "smooth") labs(caption = "loess y ~ x")} +
    {if(!showLegend) theme(legend.position = "none")}
  #
### plotting ----
setwd(saveDir)
ggsave(paste0(filename, graphType, ".png"), units = ("mm"), width = 297, height = 210, bg = "white")
plot(occupy_2states) 
setwd(workDir)
}
#
# Example usage
# poreOccupancy(dutyTimeFile = "/path/to/duty_time.csv/or/pore_activity.csv", 
#               runID = "Plot Title", ylim = c(0,100), stopPoint = 3900, graphType = "point", 
#               workDir = "/path/to/duty_file/folder",
#               filename = "file_name_to_save",
#               saveDir = "/path/to/saving/directory")