#PHO5 native, +HOCS plots

#sticking with vinay's file locations

pho5_files = list("/data/home/vt26/DM1129/DM1129_chr2_pho5matindel_m1_2019-06-18-22-32.bam",
                   "/data/home/vt26/DM1147/DM1147_chr2_pho5matindel_m1_2019-07-29-15-26.bam"
)

yam098_files = list("/data/home/vt26/DM879/DM879_sacCer3_m1_2018-08-08-14-50.bam",
                    "/data/home/vt26/DM880/DM880_sacCer3_m1_2018-08-08-15-25.bam",
                    "/data/home/vt26/DM881/DM881_sacCer3_m1_2018-08-08-15-58.bam",
                    "/data/home/vt26/DM882/DM882_sacCer3_m1_2018-08-08-16-43.bam",
                    "/data/home/vt26/DM883/DM883_sacCer3_m1_2018-08-08-17-18.bam",
                    "/data/home/vt26/DM884/DM884_sacCer3_m1_2018-08-08-17-44.bam",
                    "/data/home/vt26/DM885/DM885_sacCer3_m1_2018-08-08-18-12.bam",
                    "/data/home/vt26/DM886/DM886_sacCer3_m1_2018-08-08-18-39.bam",
                    "/data/home/vt26/DM887/DM887_sacCer3_m1_2018-08-08-19-12.bam"
)

w303_files = list("/data/illumina_pipeline/aligned_experiments_new/DM498/DM498_sacCer3_M1_2016-10-24-14-06.bam",
                  "/data/illumina_pipeline/aligned_experiments_new/DM504/DM504_sacCer3_M1_2016-10-24-15-01.bam"
)

# for (i in 1:9){
# chr = "chrII"
# df = get_dot_mat(as.character(yam098_files[i]), chr, 1, get_chr_length(as.character(yam098_files[i]), chr))
# plot(density(df$fsize))
# }


#ideal nucleosome
nuc.stats<-read.table('nuc_stats.df')
experiment=c("WT", "YKU70", "MRE11")


  
  exp=which(nuc.stats[,1]==experiment[1])  
  nucleosome_size=nuc.stats[exp,'nucleosome_size']
  var_x=nuc.stats[exp,'var_x']
  var_y=nuc.stats[exp,'var_y']
  std_x=nuc.stats[exp,'std_x']
  std_y=nuc.stats[exp,'std_y']
  
  
  
  
  #generate the idealized nucleosome 2d kernel
  #find cross correlation score to kernel density across the region of interest
  library(reshape)
  library(mvtnorm)
  library(lattice)
  
  # all lengths and positions
  lengths <- seq(1, 250, by = 1)
  pos <- seq(-75, 75, by = 1)
  
  # combinatorial combination of lengths and positions
  dens.df <- as.data.frame(expand.grid(lengths, pos))
  colnames(dens.df) <- c('length', 'pos')
  
  #note we are using variance here not std_dev as we will use later so we use 1/4 variance which is 1/2 std-dev
  get.density <- function(row)
  {
    length.mean <- nucleosome_size
    pos.mean <- 0
    length.std <- var_y/4
    pos.std <- var_x/4
    
    covmat <- matrix(c(length.std, 0, 0, pos.std), ncol=2)
    return(dmvnorm(c(row[1], row[2]), mean=c(length.mean, pos.mean), sigma=covmat))
  }
  
  # get the density for each pair of length and pos
  dens.df$density <- apply(dens.df, 1, get.density)
  
  # pivot narrow length, pos dataframe into a kernel matrix: 
  # each row is a length, each column is a position
  kernel.df <- cast(dens.df, length ~ pos, value='density')
  kernel.mat <- as.matrix(kernel.df)
  




depth.v = vector()

#read in the bam files to data frames
  chr = "chrII"
  df1 = get_dot_mat(as.character(pho5_files[1]), chr, 1, get_chr_length(as.character(pho5_files[1]), chr))
  df2 = get_dot_mat(as.character(pho5_files[2]), chr, 1, get_chr_length(as.character(pho5_files[2]), chr))
  
  df_2 = rbind(df1, df2)
  
  depth.v = c(depth.v, dim(df_2)[1])
  
  chr = "chrII"
  df1 = get_dot_mat(as.character(yam098_files[5]), chr, 1, get_chr_length(as.character(yam098_files[5]), chr))
  df2 = get_dot_mat(as.character(yam098_files[6]), chr, 1, get_chr_length(as.character(yam098_files[6]), chr))
  
  df_2 = rbind(df1, df2)
  
  depth.v = c(depth.v, dim(df_2)[1])
  
  
  chr = "chrII"
  df1 = get_dot_mat(as.character(w303_files[1]), chr, 1, get_chr_length(as.character(w303_files[1]), chr))
  df2 = get_dot_mat(as.character(w303_files[2]), chr, 1, get_chr_length(as.character(w303_files[2]), chr))
  
  df_2 = rbind(df1, df2)
  
  depth.v = c(depth.v, dim(df_2)[1])
  
  sampling_depth = min(depth.v)

  
  ho_start = 431525
  
  ho_end = 431641
  
  start = ho_start-900
  
  end = ho_end+1000
  
  chr = "chrII"
  
  #nucleosome_size = 159
  
  file_name = paste(start, "_", end, "_pho5-yam098_native_vs_insertion.png", sep="")
  png(file_name, width = 4, height = 10, units = "in", res = 300)
  #par(bg=NA)
  par(mfcol=c(7,1))
  
  #this function plots the gene bodies on top of the typhoon plots in gray boxes. if you only want annotated protein coding genes set the proteinCoding = T, otherwise if you set it to F (as i have done here) every ORF will be represented
  par(mar=c(1,3,3.5,4))
  #MakeGeneSchematic("1", i, as.numeric(i)+10000, cex_title = 2, proteinCoding = F)
  MakeArrowSchematic2("2", start, end, cex_title = 1.5, proteinCoding = F)
  
  
  text(x= (431878+431888)/2,
       y = .2,
       labels = expression(bold("Sum1p")),
       srt = 90,
       cex = 0.9
  )
  
  par(mar=c(3,3,1,4))
  
  
  set.seed(9)
  
  #read in the bam files to data frames
  chr = "chrII"
  df1 = get_dot_mat(as.character(w303_files[1]), chr, 1, get_chr_length(as.character(w303_files[1]), chr))
  df2 = get_dot_mat(as.character(w303_files[2]), chr, 1, get_chr_length(as.character(w303_files[2]), chr))
  
  df_2 = rbind(df1, df2)
  
  norm.df = df_2[sample(nrow(df_2), sampling_depth, replace = FALSE),]
  
  #plot(density(norm.df$fsize))
  
  window.df = norm.df[which(norm.df$mpoint<(end) & norm.df$mpoint>start),]
  
  dcolor = densColsDM(window.df$mpoint, window.df$fsize,
                      nbin=c(1024,1024), 
                      bandwidth=c(36,16), 
                      transformation = function(x) x^.5,
                      colramp = colorRampPalette(brewer.pal(9, "Oranges")),
                      z_factor = 1
  )
  
  cor.v = vector()
  
  reads.m = get_matrix_from_df(norm.df, 
                               (start-76), (end+151))

  #run the sliding cross correlation to the idealized nucleosome/2d kernel for this plotted window

  for (n in 1:((end)-(start-76))){

    mod_start = n
    mod_end = n + 150

    temp_window = reads.m[1:250,mod_start:mod_end]

    score = signal$correlate2d(temp_window,kernel.mat, mode='valid')
    cor.v = c(cor.v, score)

  }

  #subet the vector on the window we are plotting
  cor.v = cor.v[1:((end)-start)]

  #not sure why vinay renamed an exisitg variable.  will put the normalized counts in df1
  #df_1 = norm.df
  #df2 = df2[sample(nrow(df2), sampling_depth.r1, replace = FALSE),]
  #df3 = df3[sample(nrow(df3), sampling_depth.r2, replace = FALSE),]

  #subset the data we read in above on the window we want to plot
  #window.df = df_1[which(df_1$mpoint<(end) & df_1$mpoint>start),]

  #scale the correlation vector
  cor.v = cor.v*10000/dim(window.df)[1]

  
  
  plot(window.df$mpoint, window.df$fsize, 
       col=dcolor, 
       cex=0.25, pch=20, 
       main= "Native PHO5 (W303)", 
       xlab='Chr II Position (bp)', 
       ylab='Fragment Size',
       #xaxt = "n",
       mgp = c(2, 1, 0),
       cex.axis = 0.8,
       xaxs = "i"
  )
  
  par(new=TRUE)

  plot(cor.v,
       type = "l",
       ylim = c(0, 0.2),
       axes = FALSE,
       ann=FALSE,
       col = "darkslategray",
       xaxs = "i"
  )
  
  
  # abline(v = 430986, col = "darkgreen", lty = "dotdash", lwd = 2)
  # 
  # abline(v = (430951-82), col = "black", lty = "solid", lwd = 2)
  # 
  # text(x = 430986,
  #      y = 235,
  #      labels = "TSS",
  #      srt = 45,
  #      col = "darkgreen")
  # 
  # text(x = (430951-82),
  #      y = 235,
  #      labels = "Sal1",
  #      srt = 45)
    
    #this function plots the gene bodies on top of the typhoon plots in gray boxes. if you only want annotated protein coding genes set the proteinCoding = T, otherwise if you set it to F (as i have done here) every ORF will be represented
    par(mar=c(1,3,3.5,4))
    #MakeGeneSchematic("1", i, as.numeric(i)+10000, cex_title = 2, proteinCoding = F)
    MakeArrowSchematic2("2", start, end, cex_title = 1.5, proteinCoding = F)
    
    
    text(x= (431878+431888)/2,
         y = .2,
         labels = expression(bold("Sum1p")),
         srt = 90,
         cex = 0.9
    )
    
    par(mar=c(3,3,1,4))
    
    
    set.seed(9)
    
    #read in the bam files to data frames
    chr = "chrII"
    df1 = get_dot_mat(as.character(yam098_files[1]), chr, 1, get_chr_length(as.character(yam098_files[1]), chr))
    df2 = get_dot_mat(as.character(yam098_files[2]), chr, 1, get_chr_length(as.character(yam098_files[2]), chr))
    
    df_2 = rbind(df1, df2)
    
    norm.df = df_2[sample(nrow(df_2), sampling_depth, replace = FALSE),]
    
    #plot(density(norm.df$fsize))
    
    window.df = norm.df[which(norm.df$mpoint<(end) & norm.df$mpoint>start),]
    
    dcolor = densColsDM(window.df$mpoint, window.df$fsize,
                        nbin=c(1024,1024), 
                        bandwidth=c(36,16), 
                        transformation = function(x) x^.5,
                        colramp = colorRampPalette(brewer.pal(9, "Oranges")),
                        z_factor = 1
    )
    
    cor.v = vector()
    
    reads.m = get_matrix_from_df(norm.df, 
                                 (start-76), (end+151))
    
    #run the sliding cross correlation to the idealized nucleosome/2d kernel for this plotted window
    
    for (n in 1:((end)-(start-76))){
      
      mod_start = n
      mod_end = n + 150
      
      temp_window = reads.m[1:250,mod_start:mod_end]
      
      score = signal$correlate2d(temp_window,kernel.mat, mode='valid')
      cor.v = c(cor.v, score)
      
    }
    
    #subet the vector on the window we are plotting
    cor.v = cor.v[1:((end)-start)]
    
    #not sure why vinay renamed an exisitg variable.  will put the normalized counts in df1
    #df_1 = norm.df
    #df2 = df2[sample(nrow(df2), sampling_depth.r1, replace = FALSE),]
    #df3 = df3[sample(nrow(df3), sampling_depth.r2, replace = FALSE),]
    
    #subset the data we read in above on the window we want to plot
    #window.df = df_1[which(df_1$mpoint<(end) & df_1$mpoint>start),]
    
    #scale the correlation vector
    cor.v = cor.v*10000/dim(window.df)[1]
    
    
    
    plot(window.df$mpoint, window.df$fsize, 
         col=dcolor, 
         cex=0.25, pch=20, 
         main= "Native PHO5 (S288C)", 
         xlab='Chr II Position (bp)', 
         ylab='Fragment Size',
         #xaxt = "n",
         mgp = c(2, 1, 0),
         cex.axis = 0.8,
         xaxs = "i"
    )
    
    par(new=TRUE)
    
    plot(cor.v,
         type = "l",
         ylim = c(0, 0.2),
         axes = FALSE,
         ann=FALSE,
         col = "darkslategray",
         xaxs = "i"
    )
    
    # abline(v = 430986, col = "darkgreen", lty = "dotdash", lwd = 2)
    # 
    # abline(v = (430951-82), col = "black", lty = "solid", lwd = 2)
    # 
    # text(x = 430986,
    #      y = 235,
    #      labels = "TSS",
    #      srt = 45,
    #      col = "darkgreen")
    # 
    # text(x = (430951-82),
    #      y = 235,
    #      labels = "Sal1",
    #      srt = 45)
    # 
    
    #this function plots the gene bodies on top of the typhoon plots in gray boxes. if you only want annotated protein coding genes set the proteinCoding = T, otherwise if you set it to F (as i have done here) every ORF will be represented
    par(mar=c(1,3,3.5,4))
    #MakeGeneSchematic("1", i, as.numeric(i)+10000, cex_title = 2, proteinCoding = F)
    MakeArrowSchematic_ho("2", start, end, cex_title = 1.5, proteinCoding = F)
    
    text(x= (431878+431888+117+117)/2,
         y = .2,
         labels = expression(bold("Sum1p")),
         srt = 90,
         cex = 0.9
    )
    
    text(x= ((ho_start+ho_end)/2),
         y = .2,
         labels = expression(bold("HOcs")),
         #las = 2
         srt = 90,
         cex = 0.9
    )
    
    par(mar=c(3,3,1,4))
    
    
    set.seed(9)
    
    #read in the bam files to data frames
    chr = "chrII"
    df1 = get_dot_mat(as.character(pho5_files[1]), chr, 1, get_chr_length(as.character(pho5_files[1]), chr))
    df2 = get_dot_mat(as.character(pho5_files[2]), chr, 1, get_chr_length(as.character(pho5_files[2]), chr))
    
    df_2 = rbind(df1, df2)
    
    norm.df = df_2[sample(nrow(df_2), sampling_depth, replace = FALSE),]
    #plot(density(norm.df$fsize))
    
    window.df = norm.df[which(norm.df$mpoint<(end) & norm.df$mpoint>start),]
    
    dcolor = densColsDM(window.df$mpoint, window.df$fsize,
                        nbin=c(1024,1024), 
                        bandwidth=c(36,16), 
                        transformation = function(x) x^.5,
                        colramp = colorRampPalette(brewer.pal(9, "Oranges")),
                        z_factor = 1
    )
    
    cor.v = vector()
    
    reads.m = get_matrix_from_df(norm.df, 
                                 (start-76), (end+151))
    
    #run the sliding cross correlation to the idealized nucleosome/2d kernel for this plotted window
    
    for (n in 1:((end)-(start-76))){
      
      mod_start = n
      mod_end = n + 150
      
      temp_window = reads.m[1:250,mod_start:mod_end]
      
      score = signal$correlate2d(temp_window,kernel.mat, mode='valid')
      cor.v = c(cor.v, score)
      
    }
    
    #subet the vector on the window we are plotting
    cor.v = cor.v[1:((end)-start)]
    
    #not sure why vinay renamed an exisitg variable.  will put the normalized counts in df1
    #df_1 = norm.df
    #df2 = df2[sample(nrow(df2), sampling_depth.r1, replace = FALSE),]
    #df3 = df3[sample(nrow(df3), sampling_depth.r2, replace = FALSE),]
    
    #subset the data we read in above on the window we want to plot
    #window.df = df_1[which(df_1$mpoint<(end) & df_1$mpoint>start),]
    
    #scale the correlation vector
    cor.v = cor.v*10000/dim(window.df)[1]
    
    
    plot(window.df$mpoint, window.df$fsize, 
         col=dcolor, 
         cex=0.25, pch=20, 
         main= "PHO5 + HOcs (S288C)", 
         xlab='Chr II Position (bp)', 
         ylab='Fragment Size',
         #xaxt = "n",
         mgp = c(2, 1, 0),
         cex.axis = 0.8,
         xaxs = "i"
    )
    
    par(new=TRUE)
    
    plot(cor.v,
         type = "l",
         ylim = c(0, 0.2),
         axes = FALSE,
         ann=FALSE,
         col = "darkslategray",
         xaxs = "i"
    )
    # abline(v = ho_start, col = "blue", lty = "dotted")
    # abline(v = ho_end, col = "blue", lty = "dotted")
    # 
    # 
    # abline(v = 430986, col = "darkgreen", lty = "dotdash", lwd = 2)
    # 
    # abline(v = (430951-82), col = "black", lty = "solid", lwd = 2)
    # 
    # text(x = 430986,
    #      y = 235,
    #      labels = "TSS",
    #      srt = 45,
    #      col = "darkgreen")
    # 
    # text(x = (430951-82),
    #      y = 235,
    #      labels = "Sal1",
    #      srt = 45)
  
  
  
  dev.off()
  
  