
source('environment.r')

#read in the data files
exp=c("WT", "YKU70", "MRE11", "NHEJ", "NHEJ_DNL4")

nuc.stats<-data.frame(experiment=character(),
                      nucleosome_size=double(),
                      var_x=double(),
                      var_y=double(),
                      std_x=double(),
                      stdy_y=double())
# replicate 1
#wt

parent.dir <- 'data'

replicate.1.filenames <- get_filenames_group(parent.dir, 1, FALSE)
replicate.2.filenames <- get_filenames_group(parent.dir, 2, FALSE)
replicate.1.chr2.filenames <- get_filenames_group(parent.dir, 1, TRUE)
replicate.2.chr2.filenames <- get_filenames_group(parent.dir, 2, TRUE)

#now we need to generate an "idealized" nucleosome based on the way chemically 
#mapped nucleosomes (from the broggard data, citation below) look in our data 
#set. there are plenty of nucleosomes on ChrIV so we will use this chromosome 
#for analysis

#ensure all experiments (with duplicates) are analyzed at the same read depth 
#by sampling to the lowest read depth time point
for (e in 0:4){
  
  depth.r1 = vector()
  depth.r2 = vector()
  
  for (j in (1+e*6):(6+e*6)){
    
    replicate.1.filename = as.character(replicate.1.filenames[j])
    replicate.2.filename = as.character(replicate.2.filenames[j])
    
    #read in the bam files to data frames
    chr = "chrIV"
    df2 = get_dot_mat(replicate.1.filename, chr, 1, get_chr_length(replicate.1.filename, chr))
    df3 = get_dot_mat(replicate.2.filename, chr, 1, get_chr_length(replicate.2.filename, chr))
    
    depth.r1 = c(depth.r1, dim(df2)[1])
    depth.r2 = c(depth.r2, dim(df3)[1])
  }
  
  sampling_depth.r1 = min(depth.r1)
  sampling_depth.r2 = min(depth.r2)
  
  #get the unique nuc positions from the broggard published data set
  #https://www.nature.com/articles/nature11142
  #https://media.nature.com/original/nature-assets/nature/journal/v486/n7404/extref/nature11142-s2.txt
  
  broggard_unique.df <- fread("https://static-content.springer.com/esm/art%3A10.1038%2Fnature11142/MediaObjects/41586_2012_BFnature11142_MOESM263_ESM.txt")
  
  #annotate the columns
  colnames(broggard_unique.df) = c("chrom", "pos", "NCPscore", "NCPscorenoiseratio")
  
  #extract the nucleosome positions on chrIV
  chr4_nucs_pos = broggard_unique.df$pos[which(broggard_unique.df$chrom == "chrIV")]
  
  #initialize a blank matrix that we will update with our MNase data to make
  #an aggregate plot of a nucleosome
  my_nuc.m = matrix(0, nrow = 250, ncol = 401)
  
  #read in the data for the first/pre-induction samples
  replicate.1.filename = as.character(replicate.1.filenames[e*6+1])
  replicate.2.filename = as.character(replicate.2.filenames[e*6+1])

  df2 = get_dot_mat(replicate.1.filename, chr, 1, get_chr_length(replicate.1.filename, chr))
  df3 = get_dot_mat(replicate.2.filename, chr, 1, get_chr_length(replicate.2.filename, chr))
  
  #merge them into one data frame
  df_2 = df2[sample(nrow(df2),sampling_depth.r1, replace = F),]
  df_3 = df3[sample(nrow(df3),sampling_depth.r2, replace = F),]
  
  df_1 = rbind(df_2,df_3)
  
  #sample to the sampling depth calculated above
  reads.df = df_1
  
  # get the matrix of the entire chromosome once and index in the loop
  chr.end = get_chr_length(replicate.1.filename, chr)
  all.reads.m = get_matrix_from_df(reads.df, 1, chr.end)

  #iteratively update the blank matrix generated above with MNase 
  #reads for every nucleosome on ChrIV (aggregate plot)
  for (i in 1:length(chr4_nucs_pos)){
    start = chr4_nucs_pos[i] - 200
    end = chr4_nucs_pos[i] + 200
    
    # skip nucleosome positions on the edges
    if (start > 0 && end <= chr.end) {
      reads.m = all.reads.m[, start:end]
      
      my_nuc.m = my_nuc.m+reads.m
      
      total <- length(chr4_nucs_pos)
      # create progress bar
      pb <- txtProgressBar(min = 0, max = total, style = 3)
      #    Sys.sleep(0.1)
      # update progress bar
      setTxtProgressBar(pb, i)
    }
  }
  
  close(pb)
  
  #plot the aggregate plot of the nucleosome if you want to see what it looks like
  
  levelplot(t(my_nuc.m),
            main = "ChrIV Aggregate Nuc Plot",
            xlab = "Pos From Nucleosome Center",
            ylab = "Fragment Size",
            colorkey = list(labels = list(col="white")
            ),
            scales=
              list(
                x= list(at = seq(0,400,50),
                        labels = seq(-200,200,50)),
                y = list(at = seq(0,250,50),
                         labels = seq(0,250,50))
              )
  )
  
  #subset the matrix centered on 1 nucleosome + the bordering NFR
  subset.m = my_nuc.m[,101:301]
  
  #plot this to verify that you isolated the single aggregate nucleosome
  
  levelplot(t(subset.m),
            main = "ChrIV Aggregate Nuc Plot",
            xlab = "Pos From Nucleosome Center",
            ylab = "Fragment Size",
            colorkey = list(labels = list(col="white")
            ),
            scales=
              list(
                x= list(at = seq(0,200,25),
                        labels = seq(-100,100,25)),
                y = list(at = seq(0,250,50),
                         labels = seq(0,250,50))
              )
  )
  
  #plot 1D distributions of the nucleosome position and fragment size if desired
  
  plot(x= (-100:100), 
       y = colSums(subset.m),
       type = "l",
       lwd = 2,
       col = "blue",
       ylim = c(min(colSums(subset.m)), max(colSums(subset.m))),
       xlim = c(-200,200),
       xaxs = "i",
       xlab = "Distance From Nucleosome Center",
       ylab = "Density",
       main = "1D Nucleosome Positional Density (ChrIV)",
       cex = 1.5,
       cex.axis = 1.5,
       cex.lab=1.5
  )
  
  plot(x= (1:250), 
       y = rowSums(subset.m),
       type = "l",
       lwd = 2,
       col = "red",
       ylim = c(min(rowSums(subset.m)), max(rowSums(subset.m))),
       xlim = c(0,250),
       xaxs = "i",
       xlab = "Fragment Size (MNase)",
       ylab = "Density",
       main = "1D Nucleosomal Fragment Size Density (ChrIV)",
       cex = 1.5,
       cex.axis = 1.5,
       cex.lab=1.5
  )
  
  #we now need to generate our idealized nucleosome. we can do this by 
  #extracting the fragment size mode for the aggregate nucleosome plot as 
  #well as the variance for the fragment size (y) and position (x)
  
  #extract the variance of the x position
  x_pos = colSums(subset.m)
  index = seq(-100,100,1)
  nuc_x_pos = rep(index, x_pos)
  var_x = var(nuc_x_pos)
  std_x = sqrt(var(nuc_x_pos))
  
  #extract the nucleosome size & variance of the y position
  index = seq(1, 250, 1)
  y_pos = rowSums(subset.m)
  nuc_y_pos = rep(index, y_pos)
  nucleosome_size = Mode(nuc_y_pos)
  
  #use the nucleosome position density from nucleosome size and higher 
  #(avoid subnucleosomal contributions to variance)
  new_index = seq(nucleosome_size, 250, 1)
  new_y_pos = rowSums(subset.m)[nucleosome_size:250]
  one_tailed_nuc_y_pos = rep(new_index, new_y_pos)
  var_y = var(one_tailed_nuc_y_pos)
  std_y = sqrt(var(one_tailed_nuc_y_pos))
  
  nuc.stats.new<-data.frame(experiment=exp[e+1],
    nucleosome_size=nucleosome_size,var_x=var_x,var_y=var_y,std_x=std_x,std_y=std_y)
  
  nuc.stats<-rbind(nuc.stats,nuc.stats.new)  

  # remove large files for next iteration of the loop 
  rm(one_tailed_nuc_y_pos,df3,nuc_x_pos,nuc_y_pos,df2,df_2,reads.df,all.reads.m)     
}

write.table(nuc.stats,'nuc_stats.df')
