

file_name = c("dm764_756_merged_10minpulse_nascent_sampled_m",
              "dm765_757_merged_10minpulse_mature_m", "DM526_sacCer3_M1")

oris.df<- read.csv("~/correlation_origins_anticorr_N_M_B_H_4nucleosomesB.csv")

origins.df  <- read.csv("~/feature_files//oridb_acs_feature_file_jab-curated-798-sites_sacCer3.csv")
origins <- origins.df[(which(origins.df$footprint_class=='g1_and_g2_footprint')),]

for(i in 1:nrow(origins)){
  idx= which(oris.df$gene == origins$name[i])
  
  oris.df$timing[idx] <- origins$activation_time[i]
  oris.df$efficiency[idx] <- origins$derived_origin_efficiency_from_mcguffee_et_al_2013[i]
  oris.df$ars_name[idx] <- origins$ars_name[i]
  oris.df$pos[idx] <- origins$pos[i]
}

or <- oris.df[order(oris.df$efficiency, decreasing = T),]

H <- or[1:100,]  ###high efficiency
L <- or[(nrow(or)-99):nrow(or),]  ##low efficiency


#define the read boudaries (nucleosomal and/or subnucleosomal)
d_min = 20
d_max = 120

####### Low efficiency
file_of_interest.df <- L

# create a list to store matrices 
Lpositions = vector("list")
Lpositions[[1]] <- vector("list")
Lpositions[[2]] <- vector("list")
Lpositions[[3]] <- vector("list")

Lpositions[[1]] <- 0
Lpositions[[2]] <- 0
Lpositions[[3]] <- 0

for (f in 1:2){
  
  #data files
  file_name.bam = paste(file_name[f],".bam", sep='')
  file_name.bam.bai = paste(file_name[f],".bam.bai",sep='')
  
  for (m in 1:nrow(file_of_interest.df)){
    chr = (file_of_interest.df[m,"chr"])
    new_start= (file_of_interest.df[m, "pos"])-2500
    new_end= (file_of_interest.df[m, "pos"])+ 2500
    
    chr.gr = GRanges(seqnames= chr, ranges = IRanges(start =new_start , end = new_end ))
    
    p = ScanBamParam(what = c("rname", "strand", "pos", "isize"),which = chr.gr)
    
    A_reads.l = scanBam(file = file_name.bam,
                        index = file_name.bam.bai,
                        param = p)
    
    #All the information from the range is in the first entry of the output_reads.l list
    # str(output_reads.l[[1]]) to see list structure
    
    #create a new GenomicRanges object for the reads from this list:
    A_reads.gr = GRanges(seqnames = A_reads.l[[1]]$rname,
                         ranges = IRanges(start = A_reads.l[[1]]$pos,
                                          width = A_reads.l[[1]]$isize))
    
    subset_data.gr = A_reads.gr[which(width(A_reads.gr) > d_min & width(A_reads.gr)< d_max)]
    
    
    #changing the genomic ranges so that it uses the midpoint instead of the length of the read.                          
    ranges(subset_data.gr) = IRanges(start=mid(ranges(subset_data.gr)), width=1)
    
    ss_data.df <- as.data.frame(subset_data.gr)
    
    for(i in 1:nrow(ss_data.df)){
      ss_data.df$dis[i] <-  ss_data.df$start[i] - file_of_interest.df[m, "pos"]
    }
    if(file_of_interest.df$strand[m] == "-"){
      ss_data.df$dis <- ss_data.df$dis * (-1)  
    } else {
      ss_data.df$dif <- ss_data.df$dis
    }
    
    Lpositions[[f]] <- append(ss_data.df$dis, Lpositions[[f]])
    
  }
  
}



########
####### ##high efficiency
file_of_interest.df <- H

Hpositions = vector("list")

Hpositions[[1]] <- vector("list")
Hpositions[[2]] <- vector("list")
Hpositions[[3]] <- vector("list")


Hpositions[[1]] <- 0
Hpositions[[2]] <- 0
Hpositions[[3]] <- 0


for (f in 1:2){
  
  #data files
  file_name.bam = paste(file_name[f],".bam", sep='')
  file_name.bam.bai = paste(file_name[f],".bam.bai",sep='')
  
  for (m in 1:nrow(file_of_interest.df)){
    chr = (file_of_interest.df[m,"chr"])
    new_start= (file_of_interest.df[m, "pos"])-2500
    new_end= (file_of_interest.df[m, "pos"])+ 2500
    
    chr.gr = GRanges(seqnames= chr, ranges = IRanges(start =new_start , end = new_end ))
    
    p = ScanBamParam(what = c("rname", "strand", "pos", "isize"),which = chr.gr)
    
    A_reads.l = scanBam(file = file_name.bam,
                        index = file_name.bam.bai,
                        param = p)
    
    #All the information from the range is in the first entry of the output_reads.l list 
    # str(output_reads.l[[1]]) to see list structure
    
    #create a new GenomicRanges object for the reads from this list:
    A_reads.gr = GRanges(seqnames = A_reads.l[[1]]$rname,
                         ranges = IRanges(start = A_reads.l[[1]]$pos,
                                          width = A_reads.l[[1]]$isize))
    
    subset_data.gr = A_reads.gr[which(width(A_reads.gr) > d_min & width(A_reads.gr)< d_max)]
    
       #changing the genomic ranges so that it uses the midpoint instead of the length of the read.                          
    ranges(subset_data.gr) = IRanges(start=mid(ranges(subset_data.gr)), width=1)
    
    ss_data.df <- as.data.frame(subset_data.gr)
    
    for(i in 1:nrow(ss_data.df)){
      ss_data.df$dis[i] <-  ss_data.df$start[i] - file_of_interest.df[m, "pos"]
    }
    if(file_of_interest.df$strand[m] == "-"){
      ss_data.df$dis <- ss_data.df$dis * (-1)  
    } else {
      ss_data.df$dif <- ss_data.df$dis
    }
    
    Hpositions[[f]] <- append(ss_data.df$dis, Hpositions[[f]])
    
  }
  
}


write.csv(Hpositions[[1]], "~/Figure4/Hpositions1.csv", row.names = F)
write.csv(Hpositions[[2]], "~/Figure4/Hpositions2.csv", row.names = F)
write.csv(Lpositions[[1]], "~/Figure4/Lpositions1.csv", row.names = F)
write.csv(Lpositions[[2]], "~/Figure4/Lpositions2.csv", row.names = F)

