
# Clear the workspace
graphics.off()
rm(list = ls())

source('/figures/typhoon_plot_functions.R')

chr_subnuc_ratios.df <- read.csv('~/chr_subnuc_ratios.csv', stringsAsFactors=F) 

tfs <- chr_subnuc_ratios.df

tfs$logratio <- log2(tfs$ratio +1)
tfs <- tfs[order(tfs$logratio,  decreasing = F), ]

tfs$coln <- 1:nrow(tfs)

cl <- "logratio"
qt <-  quantile(tfs[,cl], seq(0,1, .10))

low_q <- tfs[which(tfs[,cl] < qt[2]),] #bottom of ratio
high_q <- tfs[which(tfs[,cl] > qt[10]),] #top of ratio 

nochange_q <- tfs[which(tfs$logratio < 1.1 & tfs$logratio > 0.9),] #no change


low = vector("list")
low[[1]] <- vector("list")
low[[2]] <- vector("list")
low[[3]] <- vector("list")
low[[1]] <- 0
low[[2]] <- 0
low[[3]] <- 0

high = vector("list")
high[[1]] <- vector("list")
high[[2]] <- vector("list")
high[[3]] <- vector("list")
high[[1]] <- 0
high[[2]] <- 0
high[[3]] <- 0

nochange = vector("list")
nochange[[1]] <- vector("list")
nochange[[2]] <- vector("list")
nochange[[3]] <- vector("list")
nochange[[1]] <- 0
nochange[[2]] <- 0
nochange[[3]] <- 0


file_name = c("dm764_756_merged_10minpulse_nascent_sampled_m",
              "dm765_757_merged_10minpulse_mature_m", "DM526_sacCer3_M1")


#define the read boudaries (nucleosomal and/or subnucleosomal)
d_min = 140
d_max = 180

#making a generic data frame with file containing sites for protein of interest.  
file_of_interest.df <- low_q


for (f in 1:2){
  
  #path to bam file
  file_name.bam = paste(file_name[f],".bam", sep='')
  file_name.bam.bai = paste(file_name[f],".bam.bai",sep='')
  
  for (m in 1:nrow(file_of_interest.df)){
    chr = (file_of_interest.df[m,"chr"])
    new_start= (file_of_interest.df[m, "subnuc_peaks"])-1000
    new_end= (file_of_interest.df[m, "subnuc_peaks"])+ 1000
    
    chr.gr = GRanges(seqnames= chr, ranges = IRanges(start =new_start , end = new_end ))
    
    p = ScanBamParam(what = c("rname", "strand", "pos", "isize"),which = chr.gr)
    
    A_reads.l = scanBam(file = file_name.bam,
                        index = file_name.bam.bai,
                        param = p)
    
    #All the information from the range is in the first entry of the output_reads.l list
    # str(output_reads.l[[1]]) to see list structure
    
    #create a new GenomicRanges object for the reads from this list:
    A_reads.gr = GRanges(seqnames = A_reads.l[[1]]$rname,
                         ranges = IRanges(start = A_reads.l[[1]]$pos,
                                          width = A_reads.l[[1]]$isize))
    
    subset_data.gr = A_reads.gr[which(width(A_reads.gr) > d_min & width(A_reads.gr)< d_max)]
    
    #changing the genomic ranges so that it uses the midpoint instead of the length of the read.                          
    ranges(subset_data.gr) = IRanges(start=mid(ranges(subset_data.gr)), width=1)
    
    ss_data.df <- as.data.frame(subset_data.gr)
    
    for(i in 1:nrow(ss_data.df)){
      ss_data.df$dis[i] <-  ss_data.df$start[i] - file_of_interest.df[m, "subnuc_peaks"]
    }
    
    low[[f]] <- append(ss_data.df$dis, low[[f]])
    
    cat(paste("done with low TF", m, "on data set", f, sep = ' '), "\n")
    
  }
}


file_of_interest.df <- high_q

for (f in 1:2){
  
  #data files
  file_name.bam = paste(file_name[f],".bam", sep='')
  file_name.bam.bai = paste(file_name[f],".bam.bai",sep='')
  
  for (m in 1:nrow(file_of_interest.df)){
    chr = (file_of_interest.df[m,"chr"])
    new_start= (file_of_interest.df[m, "subnuc_peaks"])-1000
    new_end= (file_of_interest.df[m, "subnuc_peaks"])+ 1000
    
    chr.gr = GRanges(seqnames= chr, ranges = IRanges(start =new_start , end = new_end ))
    
    p = ScanBamParam(what = c("rname", "strand", "pos", "isize"),which = chr.gr)
    
    A_reads.l = scanBam(file = file_name.bam,
                        index = file_name.bam.bai,
                        param = p)
    
    #All the information from the range is in the first entry of the output_reads.l list 
    # str(output_reads.l[[1]]) to see list structure
    
    #create a new GenomicRanges object for the reads from this list:
    A_reads.gr = GRanges(seqnames = A_reads.l[[1]]$rname,
                         ranges = IRanges(start = A_reads.l[[1]]$pos,
                                          width = A_reads.l[[1]]$isize))
    
    subset_data.gr = A_reads.gr[which(width(A_reads.gr) > d_min & width(A_reads.gr)< d_max)]
    
        #changing the genomic ranges so that it uses the midpoint instead of the length of the read.                          
    ranges(subset_data.gr) = IRanges(start=mid(ranges(subset_data.gr)), width=1)
    
    ss_data.df <- as.data.frame(subset_data.gr)
    
    for(i in 1:nrow(ss_data.df)){
      ss_data.df$dis[i] <-  ss_data.df$start[i] - file_of_interest.df[m, "subnuc_peaks"]
    }
    
    high[[f]] <- append(ss_data.df$dis, high[[f]])
    cat(paste("done with high TF", m, "on data set", f, sep = ' '), "\n")
    
  }
  
}

#making a generic data frame with file containing sites for protein of interest.  
file_of_interest.df <- nochange_q


for (f in 1:2){
  
  #path to bam file
  file_name.bam = paste(file_name[f],".bam", sep='')
  file_name.bam.bai = paste(file_name[f],".bam.bai",sep='')
  
  for (m in 1:nrow(file_of_interest.df)){
    chr = (file_of_interest.df[m,"chr"])
    new_start= (file_of_interest.df[m, "subnuc_peaks"])-1000
    new_end= (file_of_interest.df[m, "subnuc_peaks"])+ 1000
    
    chr.gr = GRanges(seqnames= chr, ranges = IRanges(start =new_start , end = new_end ))
    
    p = ScanBamParam(what = c("rname", "strand", "pos", "isize"),which = chr.gr)
    
    A_reads.l = scanBam(file = file_name.bam,
                        index = file_name.bam.bai,
                        param = p)
    
    #All the information from the range is in the first entry of the output_reads.l list 
    # str(output_reads.l[[1]]) to see list structure
    
    #create a new GenomicRanges object for the reads from this list:
    A_reads.gr = GRanges(seqnames = A_reads.l[[1]]$rname,
                         ranges = IRanges(start = A_reads.l[[1]]$pos,
                                          width = A_reads.l[[1]]$isize))
    
    subset_data.gr = A_reads.gr[which(width(A_reads.gr) > d_min & width(A_reads.gr)< d_max)]
       
    #changing the genomic ranges so that it uses the midpoint instead of the length of the read.                          
    ranges(subset_data.gr) = IRanges(start=mid(ranges(subset_data.gr)), width=1)
    
    ss_data.df <- as.data.frame(subset_data.gr)
    
    for(i in 1:nrow(ss_data.df)){
      ss_data.df$dis[i] <-  ss_data.df$start[i] - file_of_interest.df[m, "subnuc_peaks"]
    }
    
    nochange[[f]] <- append(ss_data.df$dis, nochange[[f]])
    
    cat(paste("done with nochange TF", m, "on data set", f, sep = ' '), "\n")
    
  }
  
}


write.csv(high[[1]], "~/Figure5/Nuc_highratio1.csv", row.names = F)
write.csv(high[[2]], "~/Figure5/Nuc_highratio2.csv", row.names = F)
write.csv(low[[1]], "~/Figure5/Nuc_lowratio1.csv", row.names = F)
write.csv(low[[2]], "~/Figure5/Nuc_lowratio2.csv", row.names = F)
write.csv(nochange[[1]], "~/Figure5/Nuc_nochangeratio1.csv", row.names = F)
write.csv(nochange[[2]], "~/Figure5/Nuc_nochangeratio2.csv", row.names = F)
