source('environment.r')


#Plot coverage trace of mnase time course, wild type samples showen here
#replace the bam file paths with YKU70-, MRE11- or NHEJ data to generate repsective plots

#list of paths to the bam files aligned to the sacCer3 Genome

parent.dir <- 'data'

replicate.1.filenames <- get_filenames_group(parent.dir, 1, FALSE)
replicate.2.filenames <- get_filenames_group(parent.dir, 2, FALSE)
replicate.1.chr2.filenames <- get_filenames_group(parent.dir, 1, TRUE)
replicate.2.chr2.filenames <- get_filenames_group(parent.dir, 2, TRUE)

#read in southerndata

southern_cutting_data.df<-read.table(paste(parent.dir,'/southern_cutting_data.tsv',sep=""),header=T)


exp=c("WT", "YKU70", "MRE11", "NHEJ", "NHEJ_DNL4")

title=c("WT Relative (Pre-Induction) MNase Coverage",
       expression(italic("yku70")*italic(Delta)*" Relative (Pre-Induction) MNase Coverage"),
       expression(italic("mre11")*italic(Delta)*" Relative (Pre-Induction) MNase Coverage"),
       "WT Relative (Pre-Induction) MNase Coverage",
       expression(italic("dnl4")*italic(Delta)*" Relative (Pre-Induction) MNase Coverage")
       )

lower.y<-c(-2,-2,-2,-3,-3)
all_coverage.l = list()


for (e in 1:5){


#calculate sampling depth for all samples to be at the same read depth

depth.r1 = vector()
depth.r2 = vector()

for (i in 1:6){
  
replicate.1.filename = as.character(replicate.1.chr2.filenames[(e-1)*6+i])
replicate.2.filename = as.character(replicate.2.chr2.filenames[(e-1)*6+i])

  chr = "chrII"
  df2 = get_dot_mat(replicate.1.filename, chr, 1, get_chr_length(replicate.1.filename, chr))
  df3 = get_dot_mat(replicate.2.filename, chr, 1, get_chr_length(replicate.2.filename, chr))
  
  
  
  depth.r1 = c(depth.r1, dim(df2)[1])
  depth.r2 = c(depth.r2, dim(df3)[1])
  
  #df_2 = rbind(df2, df3)
  
  #depth.v = c(depth.v, dim(df_2)[1])
}

sampling_depth.r1 = min(depth.r1)
sampling_depth.r2 = min(depth.r2)

 

#set boundaries of coverage window

ho_start = 431525

ho_end = 431641

start = ho_start-25000

end = ho_end+25000

chr = "chrII"


#intialize an empty list and then calculate a vector of coverage values for each time point and append to this list
wt_mnase_coverage.l = list()
#all_coverage.l = list()


for (i in 1:6){
  
  set.seed(9)
  
  replicate.1.filename = as.character(replicate.1.chr2.filenames[(e-1)*6+i])
  replicate.2.filename = as.character(replicate.2.chr2.filenames[(e-1)*6+i])
  
  #read in the bam files to data frames
  chr = "chrII"
  df2 = get_dot_mat(replicate.1.filename, chr, 1, get_chr_length(replicate.1.filename, chr))
  df3 = get_dot_mat(replicate.2.filename, chr, 1, get_chr_length(replicate.2.filename, chr))
  
  #read in the bam files to data frames and normalize to lowest read depth
  #chr = "chrII"
  #df2 = get_dot_mat(as.character(chr2_files2[i+e*6]), chr, 1, get_chr_length(as.character(chr2_files2[i+e*6]), chr))
  #df3 = get_dot_mat(as.character(chr2_files3[i+e*6]), chr, 1, get_chr_length(as.character(chr2_files3[i+e*6]), chr))
  
  # df_2 = rbind(df2, df3)
  
  df_2 = df2[sample(nrow(df2),sampling_depth.r1, replace = F),]
  df_3 = df3[sample(nrow(df3),sampling_depth.r2, replace = F),]
  
  
  #norm.df = df_2[sample(nrow(df_2), sampling_depth, replace = FALSE),]
  
  df_1 = rbind(df_2,df_3)
  
  
  new.coverage <- calculate.coverage.roll(df_1, start, end)
  
  
  
  # norm.df = df_1
  # coverage.v = vector()
  # for (n in seq(start,(end-500),10)){
  #   
  #   mod_start = n
  #   mod_end = n + 500
  #   
  #   occupancy_of_window = which(norm.df$mpoint > mod_start & 
  #                                 norm.df$mpoint< mod_end)
  #   
  #   
  #   reads_in_window = length(occupancy_of_window)
  #   
  #   coverage.v = c(coverage.v, reads_in_window)
  #   
  #   
  #   # create progress bar
  #   pb <- txtProgressBar(min = 0, max = (end-500-start), style = 3)
  #   #    Sys.sleep(0.1)
  #   # update progress bar
  #   setTxtProgressBar(pb, (n-start))
  #   
  #   
  # }
  
  #wt_mnase_coverage.l[[i]] = coverage.v
  wt_mnase_coverage.l[[i]] = new.coverage
  all_coverage.l[[(e-1)*6+i]] <- new.coverage
  #print(i)
  
}

#close(pb)

#plot the coverage
if(e>3){
  file_name=paste("figures/Figure_5BC_",exp[e],"_mnase.png",sep="")
  
  png(file_name, width = 6, height = 6, units = "in", res = 300)
} else {
  file_name=paste("figures/Figure_1C_",exp[e],"_mnase.png",sep="")
  
png(file_name, width = 10, height = 4, units = "in", res = 300)
}

plot(x=seq(start,(end-500),10),
     y= log2(wt_mnase_coverage.l[[1]]/wt_mnase_coverage.l[[1]]),
     type = "l",
     col = blues9[4],
     lty = 1,
     #main = "WT Relative (Pre-Induction) MNase Coverage",
     #main = expression(bold(italic("yku70")*italic(Delta)*" Relative (Pre-Induction) MNase Coverage")),
     main = title[e],
     ylab = "Log2(FPKM Normalized Ratio)",
     xlab = "ChrII Position (bp)",
     ylim = c(lower.y[e], 1)
)

for (i in 2:6){
  
  lines(x = seq(start,(end-500),10),
        y= log2(wt_mnase_coverage.l[[i]]/wt_mnase_coverage.l[[1]]),
        type = "l",
        col = blues9[i+3],
        lty = 2
  )
  
}

#add a legend
if(e>3){
legend("bottomright", legend = c(-1,0,1,2,4,6),
       border = "gray",
       col = blues9[4:9],
       pch = "-",
       lwd = 3,
       cex = 1.2
)
}

#mark the HO cut site
abline(v = ho_end, col = "red")
abline(v = ho_start, col = "red")

dev.off()

}
# ##yku70- data
# chr2_files2 = list("/data/home/vt26/DM1135/DM1135_chr2_pho5matindel_m1_2019-06-18-23-47.bam",
#                    "/data/home/vt26/DM1136/DM1136_chr2_pho5matindel_m1_2019-06-19-00-00.bam",
#                    "/data/home/vt26/DM1137/DM1137_chr2_pho5matindel_m1_2019-06-19-00-12.bam",
#                    "/data/home/vt26/DM1138/DM1138_chr2_pho5matindel_m1_2019-06-19-00-25.bam",
#                    "/data/home/vt26/DM1139/DM1139_chr2_pho5matindel_m1_2019-06-19-00-37.bam",
#                    "/data/home/vt26/DM1140/DM1140_chr2_pho5matindel_m1_2019-06-19-00-50.bam"
# )
# chr2_files3 = list("/data/home/vt26/DM1153/DM1153_chr2_pho5matindel_m1_2019-07-29-16-21.bam",
#                    "/data/home/vt26/DM1154/DM1154_chr2_pho5matindel_m1_2019-07-29-16-45.bam",
#                    "/data/home/vt26/DM1155/DM1155_chr2_pho5matindel_m1_2019-07-29-16-56.bam",
#                    "/data/home/vt26/DM1156/DM1156_chr2_pho5matindel_m1_2019-07-29-17-29.bam",
#                    "/data/home/vt26/DM1157/DM1157_chr2_pho5matindel_m1_2019-07-30-01-44.bam",
#                    "/data/home/vt26/DM1158/DM1158_chr2_pho5matindel_m1_2019-07-30-01-54.bam"
# )
# 
# sacCer3_files2 = list("/data/home/vt26/DM1135/DM1135_sacCer3_m1_2019-06-19-02-37.bam",
#                       "/data/home/vt26/DM1136/DM1136_sacCer3_m1_2019-06-19-02-51.bam",
#                       "/data/home/vt26/DM1137/DM1137_sacCer3_m1_2019-06-19-03-06.bam",
#                       "/data/home/vt26/DM1138/DM1138_sacCer3_m1_2019-06-19-03-20.bam",
#                       "/data/home/vt26/DM1139/DM1139_sacCer3_m1_2019-06-19-03-35.bam",
#                       "/data/home/vt26/DM1140/DM1140_sacCer3_m1_2019-06-19-03-49.bam"
# )
# sacCer3_files3 = list("/data/home/vt26/DM1153/DM1153_sacCer3_m1_2019-07-30-13-13.bam",
#                       "/data/home/vt26/DM1154/DM1154_sacCer3_m1_2019-07-30-13-23.bam",
#                       "/data/home/vt26/DM1155/DM1155_sacCer3_m1_2019-07-30-13-35.bam",
#                       "/data/home/vt26/DM1156/DM1156_sacCer3_m1_2019-07-30-13-48.bam",
#                       "/data/home/vt26/DM1157/DM1157_sacCer3_m1_2019-07-30-13-57.bam",
#                       "/data/home/vt26/DM1158/DM1158_sacCer3_m1_2019-07-30-14-07.bam"
# )
# 
# #calculate sampling depth for all samples to be at the same read depth
# 
# depth.v = vector()
# 
# for (i in 1:6){
#   
#   #read in the bam files to data frames
#   chr = "chrII"
#   df2 = get_dot_mat(as.character(chr2_files2[i]), chr, 1, get_chr_length(as.character(chr2_files2[i]), chr))
#   df3 = get_dot_mat(as.character(chr2_files3[i]), chr, 1, get_chr_length(as.character(chr2_files3[i]), chr))
#   
#   df_2 = rbind(df2, df3)
#   
#   depth.v = c(depth.v, dim(df_2)[1])
# }
# 
# sampling_depth = min(depth.v)
# 
# 
# #set boundaries of coverage window
# 
# ho_start = 431525
# 
# ho_end = 431641
# 
# start = ho_start-25000
# 
# end = ho_end+25000
# 
# chr = "chrII"
# 
# 
# #intialize an empty list and then calculate a vector of coverage values for each time point and append to this list
# yku70_mnase_coverage.l = list()
# 
# for (i in 1:6){
#   
#   set.seed(9)
#   
#   #read in the bam files to data frames
#   chr = "chrII"
#   df2 = get_dot_mat(as.character(chr2_files2[i]), chr, 1, get_chr_length(as.character(chr2_files2[i]), chr))
#   df3 = get_dot_mat(as.character(chr2_files3[i]), chr, 1, get_chr_length(as.character(chr2_files3[i]), chr))
#   
#   df_2 = rbind(df2, df3)
#   
#   norm.df = df_2[sample(nrow(df_2), sampling_depth, replace = FALSE),]
#   coverage.v = vector()
#   for (n in seq(start,(end-500),10)){
#     
#     mod_start = n
#     mod_end = n + 500
#     
#     occupancy_of_window = which(norm.df$mpoint > mod_start & 
#                                   norm.df$mpoint< mod_end)
#     
#     
#     reads_in_window = length(occupancy_of_window)
#     
#     coverage.v = c(coverage.v, reads_in_window)
#     
#     
#     # create progress bar
#     pb <- txtProgressBar(min = 0, max = (end-500-start), style = 3)
#     #    Sys.sleep(0.1)
#     # update progress bar
#     setTxtProgressBar(pb, (n-start))
#     
#     
#   }
#   
#   yku70_mnase_coverage.l[[i]] = coverage.v
#   
#   print(i)
#   
# }
# 
# close(pb)
# 
# #plot the coverage
# 
# 
# 
# plot(x=seq(start,(end-500),10),
#      y= log2(yku70_mnase_coverage.l[[1]]/yku70_mnase_coverage.l[[1]]),
#      type = "l",
#      col = blues9[4],
#      lty = 1,
#      main = "Relative (Pre-Induction) MNase Coverage",
#      ylab = "Log2(FPKM Normalized Ratio)",
#      xlab = "Position (ChrII)",
#      ylim = c(-3, 1)
# )
# 
# for (i in 2:6){
#   
#   lines(x = seq(start,(end-500),10),
#         y= log2(yku70_mnase_coverage.l[[i]]/yku70_mnase_coverage.l[[1]]),
#         type = "l",
#         col = blues9[i+3],
#         lty = 2
#   )
#   
# }
# 
# #add a legend
# 
# legend(453500, y = -.65, legend = c(0,15,30,60,90,120),
#        border = "gray",
#        col = blues9[4:9],
#        pch = "-",
#        lwd = 3,
#        cex = 0.4
# )
# 
# #mark the HO cut site
# abline(v = ho_end, col = "red")
# abline(v = ho_start, col = "red")
# 
# ##Mre11-
# 
# chr2_files2 = list("/data/home/vt26/DM1141/DM1141_chr2_pho5matindel_m1_2019-06-19-01-02.bam",
#                    "/data/home/vt26/DM1142/DM1142_chr2_pho5matindel_m1_2019-06-19-01-15.bam",
#                    "/data/home/vt26/DM1143/DM1143_chr2_pho5matindel_m1_2019-06-19-01-26.bam",
#                    "/data/home/vt26/DM1144/DM1144_chr2_pho5matindel_m1_2019-06-19-01-39.bam",
#                    "/data/home/vt26/DM1145/DM1145_chr2_pho5matindel_m1_2019-06-19-01-50.bam",
#                    "/data/home/vt26/DM1146/DM1146_chr2_pho5matindel_m1_2019-06-19-02-01.bam"
# )
# chr2_files3 = list("/data/home/vt26/DM1159/DM1159_chr2_pho5matindel_m1_2019-07-30-02-04.bam",
#                    "/data/home/vt26/DM1160/DM1160_chr2_pho5matindel_m1_2019-07-30-02-13.bam",
#                    "/data/home/vt26/DM1161/DM1161_chr2_pho5matindel_m1_2019-07-30-02-22.bam",
#                    "/data/home/vt26/DM1162/DM1162_chr2_pho5matindel_m1_2019-07-30-02-32.bam",
#                    "/data/home/vt26/DM1163/DM1163_chr2_pho5matindel_m1_2019-07-30-02-42.bam",
#                    "/data/home/vt26/DM1164/DM1164_chr2_pho5matindel_m1_2019-07-30-02-51.bam"
# )
# 
# 
# sacCer3_files2 = list("/data/home/vt26/DM1141/DM1141_sacCer3_m1_2019-06-19-04-03.bam",
#                       "/data/home/vt26/DM1142/DM1142_sacCer3_m1_2019-06-19-04-18.bam",
#                       "/data/home/vt26/DM1143/DM1143_sacCer3_m1_2019-06-19-04-30.bam",
#                       "/data/home/vt26/DM1144/DM1144_sacCer3_m1_2019-06-19-04-45.bam",
#                       "/data/home/vt26/DM1145/DM1145_sacCer3_m1_2019-06-19-04-57.bam",
#                       "/data/home/vt26/DM1146/DM1146_sacCer3_m1_2019-06-19-05-08.bam"
# )
# sacCer3_files3 = list("/data/home/vt26/DM1159/DM1159_sacCer3_m1_2019-07-30-14-17.bam",
#                       "/data/home/vt26/DM1160/DM1160_sacCer3_m1_2019-07-30-14-28.bam",
#                       "/data/home/vt26/DM1161/DM1161_sacCer3_m1_2019-07-30-14-42.bam",
#                       "/data/home/vt26/DM1162/DM1162_sacCer3_m1_2019-07-30-14-53.bam",
#                       "/data/home/vt26/DM1163/DM1163_sacCer3_m1_2019-07-30-15-02.bam",
#                       "/data/home/vt26/DM1164/DM1164_sacCer3_m1_2019-07-30-15-15.bam"
# )
# 
# #calculate sampling depth for all samples to be at the same read depth
# 
# depth.v = vector()
# 
# for (i in 1:6){
#   
#   #read in the bam files to data frames
#   chr = "chrII"
#   df2 = get_dot_mat(as.character(chr2_files2[i]), chr, 1, get_chr_length(as.character(chr2_files2[i]), chr))
#   df3 = get_dot_mat(as.character(chr2_files3[i]), chr, 1, get_chr_length(as.character(chr2_files3[i]), chr))
#   
#   df_2 = rbind(df2, df3)
#   
#   depth.v = c(depth.v, dim(df_2)[1])
# }
# 
# sampling_depth = min(depth.v)
# 
# 
# #set boundaries of coverage window
# 
# ho_start = 431525
# 
# ho_end = 431641
# 
# start = ho_start-25000
# 
# end = ho_end+25000
# 
# chr = "chrII"
# 
# 
# #intialize an empty list and then calculate a vector of coverage values for each time point and append to this list
# mre11_mnase_coverage.l = list()
# 
# for (i in 1:6){
#   
#   set.seed(9)
#   
#   #read in the bam files to data frames
#   chr = "chrII"
#   df2 = get_dot_mat(as.character(chr2_files2[i]), chr, 1, get_chr_length(as.character(chr2_files2[i]), chr))
#   df3 = get_dot_mat(as.character(chr2_files3[i]), chr, 1, get_chr_length(as.character(chr2_files3[i]), chr))
#   
#   df_2 = rbind(df2, df3)
#   
#   norm.df = df_2[sample(nrow(df_2), sampling_depth, replace = FALSE),]
#   coverage.v = vector()
#   for (n in seq(start,(end-500),10)){
#     
#     mod_start = n
#     mod_end = n + 500
#     
#     occupancy_of_window = which(norm.df$mpoint > mod_start & 
#                                   norm.df$mpoint< mod_end)
#     
#     
#     reads_in_window = length(occupancy_of_window)
#     
#     coverage.v = c(coverage.v, reads_in_window)
#     
#     
#     # create progress bar
#     pb <- txtProgressBar(min = 0, max = (end-500-start), style = 3)
#     #    Sys.sleep(0.1)
#     # update progress bar
#     setTxtProgressBar(pb, (n-start))
#     
#     
#   }
#   
#   mre11_mnase_coverage.l[[i]] = coverage.v
#   
#   print(i)
#   
# }
# 
# close(pb)
# 
# #plot the coverage
# 
# plot(x=seq(start,(end-500),10),
#      y= log2(mre11_mnase_coverage.l[[1]]/mre11_mnase_coverage.l[[1]]),
#      type = "l",
#      col = blues9[4],
#      lty = 1,
#      main = "Relative (Pre-Induction) MNase Coverage",
#      ylab = "Log2(FPKM Normalized Ratio)",
#      xlab = "Position (ChrII)",
#      ylim = c(-3, 1)
# )
# 
# for (i in 2:6){
#   
#   lines(x = seq(start,(end-500),10),
#         y= log2(mre11_mnase_coverage.l[[i]]/mre11_mnase_coverage.l[[1]]),
#         type = "l",
#         col = blues9[i+3],
#         lty = 2
#   )
#   
# }
# 
# #add a legend
# 
# legend(453500, y = -.65, legend = c(0,15,30,60,90,120),
#        border = "gray",
#        col = blues9[4:9],
#        pch = "-",
#        lwd = 3,
#        cex = 0.4
# )
# 
# #mark the HO cut site
# abline(v = ho_end, col = "red")
# abline(v = ho_start, col = "red")

png("figures/Figure_1D_mnase_smooth.png", width = 6, height = 5, units = "in", res = 300)

plot(x=seq(start,(end-500),10),
     y = smooth.spline(log2(all_coverage.l[[6]]/all_coverage.l[[1]]), spar = .65)$y,
     type = "l",
     col = "blue",
     lty = 1,
     main = "Smoothed MNase Coverage (120 min)",
     ylab = "Log2(FPKM Normalized Ratio)",
     xlab = "ChrII Position (bp)",
     ylim = c(-2, 1),
     mgp = c(2,1,0),
     cex.axis = 0.8
)
abline(v = ho_end+500, col = "red")
abline(v = ho_start+500, col = "red")

lines(x=seq(start,(end-500),10),
      y = smooth.spline(log2(all_coverage.l[[12]]/all_coverage.l[[7]]), spar = .65)$y,
      type = "l",
      col = "blue",
      lty = 2,
      main = "WT Relative (Pre-Induction) MNase Coverage",
      ylab = "Log2(FPKM Normalized Ratio)",
      xlab = "ChrII Position (bp)",
      ylim = c(-2, 1),
      mgp = c(2,1,0),
      cex.axis = 0.8
)

lines(x=seq(start,(end-500),10),
      y = smooth.spline(log2(all_coverage.l[[18]]/all_coverage.l[[13]]), spar = .65)$y,
      type = "l",
      col = "blue",
      lty = 3,
      main = "Smoothed MNase Coverage (120 min)",
      ylab = "Log2(FPKM Normalized Ratio)",
      xlab = "ChrII Position (bp)",
      ylim = c(-2, 1),
      mgp = c(2,1,0),
      cex.axis = 0.8
)

abline(h = 0, lwd = 0.5)


legend("bottomright", 
       legend = c("WT", 
                  expression(italic("yku70"*Delta)),
                  expression(italic("mre11"*Delta))),
       border = "gray",
       col = "blue",
       lty = c(1,2,3),
       lwd = 1,
       cex = 1
)



dev.off()

file_name = "figures/Figure_1B_southern.png"
png(file_name, width = 5, height = 4, units = "in", res = 300)

#plot the wt data
plot(x = southern_cutting_data.df$time,
     y = southern_cutting_data.df$wt,
     type = "b",
     xlab = "Time (Minutes)",
     ylab = "Fraction Cut",
     ylim = c(0,1),
     xaxt = 'n',
     yaxt = 'n',
     pch = 0,
     main = expression(bold("Break Induction Kinetics At ")*bolditalic("PHO5"))
)

#specify axes
axis(1,
     at = c(0, 15, 30, 60, 90, 120),
     labels = c(0, 15, 30, 60, 90, 120))

axis(2,
     at = seq(0,1,0.2),
     labels = seq(0,1,0.2))

#draw a line at 90% cut
abline(h = 0.9,
       col = "red",
       lty = "dotted")

#add the mutants to the plot
lines(x = southern_cutting_data.df$time,
      y = southern_cutting_data.df$yku70,
      type = "b",
      xlab = "Time",
      ylab = "Fraction Cut",
      ylim = c(0,1),
      xaxt = 'n',
      yaxt = 'n',
      lty = 2,
      pch = 1
)

lines(x = southern_cutting_data.df$time,
      y = southern_cutting_data.df$mre11,
      type = "b",
      xlab = "Time",
      ylab = "Fraction Cut",
      ylim = c(0,1),
      xaxt = 'n',
      yaxt = 'n',
      lty = 4,
      pch = 2
)

#add the legend
legend(90,
       0.6,
       legend = c("WT", 
                  expression(italic("yku70")*italic(Delta)), 
                  expression(italic("mre11")*italic(Delta))),
       lty = c(1,2,4),
       pch = c(0,1,2),
       cex = 0.8
       
)

dev.off()
