library(data.table)
library(tidyverse)
library(ggpubr)
library(ggpmisc)
library(IRanges)
library(GenomicRanges)
#Skip to line 66 #######

runs <-fread("v5_LP_hairpins_terminal_inverted_repeat_element.bed")


IR_runs <- IRanges(start = runs$V2, end = runs$V3)
GR_runs <- GRanges(seqnames = runs$V1, ranges = IR_runs)
runs_df <- as.data.frame(GR_runs)

###############
# METHYLATION #
###############

#Filename = "B73_CG_terminal_inverted_repeat_element.BedGraph"

terminal_inverted_repeat_elements <- fread("../Downloads/v5_terminal_inverted_repeat_element.bed")
colnames(terminal_inverted_repeat_elements) <- c("terminal_inverted_repeat_element_chr", "terminal_inverted_repeat_element_min", "terminal_inverted_repeat_element_max", "V4")
runs<- merge(runs, terminal_inverted_repeat_elements, by = "V4", allow.cartesian=TRUE)
runs$terminal_inverted_repeat_elementLength <- runs$terminal_inverted_repeat_element_max - runs$terminal_inverted_repeat_element_min 


terminal_inverted_repeat_elements <- runs[,c("terminal_inverted_repeat_element_chr", "terminal_inverted_repeat_element_min", "terminal_inverted_repeat_element_max")]
terminal_inverted_repeat_elements <- unique(terminal_inverted_repeat_elements)

IR_terminal_inverted_repeat_elements <- IRanges(start = terminal_inverted_repeat_elements$terminal_inverted_repeat_element_min, 
                    end = terminal_inverted_repeat_elements$terminal_inverted_repeat_element_max)
GR_terminal_inverted_repeat_elements <- GRanges(seqnames = terminal_inverted_repeat_elements$terminal_inverted_repeat_element_chr, ranges = IR_terminal_inverted_repeat_elements)
GR_terminal_inverted_repeat_elements_df <- as.data.frame(GR_terminal_inverted_repeat_elements)

### METAPLOTS
## CG
CG <- fread("./structure/B73_CG.BedGraph")
gc()


IR_CG <- IRanges(start = CG$V2, width = 1)
GR_CG <- GRanges(seqnames = CG$V1, ranges = IR_CG)
IR_CG <- NULL

gc()

CG <- CG[as.data.frame(findOverlaps(GR_CG, GR_terminal_inverted_repeat_elements))$queryHits,]
gc()

write_delim(CG, "B73_CG_terminal_inverted_repeat_element.BedGraph", delim = "\t")


#actual
#CG <- fread("B73_CG_terminal_inverted_repeat_element.BedGraph")

CG_cov <- fread("B73_coverage_CG.BedGraph.gz")


IR_CG_cov <- IRanges(start = CG_cov$V2, width = 1)
GR_CG_cov <- GRanges(seqnames = CG_cov$V1, ranges = IR_CG_cov)
IR_CG_cov <- NULL

gc()

CG_cov <- CG_cov[as.data.frame(findOverlaps(GR_CG_cov, GR_terminal_inverted_repeat_elements))$queryHits,]
gc()
write_delim(CG_cov, "B73_CG_cov_terminal_inverted_repeat_element.BedGraph", delim = "\t")

#actual
#CG_cov <- fread("B73_CG_cov_terminal_inverted_repeat_element.BedGraph")

IR_CG_cov <- IRanges(start = CG_cov$V2+1, end = CG_cov$V3)
GR_CG_cov <- GRanges(seqnames = CG_cov$V1, ranges = IR_CG_cov)
#CG_cov <- NULL
gc()
#GR_CG_cov_df <- as.data.frame(GR_CG_cov)

setDT(CG)
CG <- CG[rep(1:.N, (V3 - V2))][, pos := (V2) + (1:.N), by = V2][,c("V1", "pos", "V4")]
gc()


IR_CG <- IRanges(start = CG$pos, end = CG$pos)
GR_CG <- GRanges(seqnames = CG$V1, ranges = IR_CG)

IR_CG_cov <- IRanges(start = CG_cov$V2+1, end = CG_cov$V3)
GR_CG_cov <- GRanges(seqnames = CG_cov$V1, ranges = IR_CG_cov)

CG$cov <- NA

gc()

hitbox <- as.data.frame(findOverlaps(GR_CG, GR_CG_cov))

test <- CG
CG <- CG[hitbox$queryHits,]
CG$cov <- CG_cov[hitbox$subjectHits,]$V4
CG$meth_reads <- CG$V4 * CG$cov

IR_CG <- IRanges(start = CG$pos, end = CG$pos)
GR_CG <- GRanges(seqnames = CG$V1, ranges = IR_CG)

hitbox_runs <- as.data.frame(findOverlaps(GR_CG, GR_runs))
CG_in_runs <- CG[hitbox_runs$queryHits,]
CG_NOTin_runs <- CG[!hitbox_runs$queryHits,]

sum(CG_in_runs$meth_reads_int) / sum(CG_in_runs$cov)
sum(CG_NOTin_runs$meth_reads_int) / sum(CG_NOTin_runs$cov)

CG$in_run <- FALSE
CG[as.data.frame(findOverlaps(GR_CG, GR_runs))$queryHits,]$in_run <- TRUE

runs_df <- runs

runs_df$width <- runs_df$V3 - runs_df$V2
## Random windows
winsize <- 100
runs_df$rand <- mapply(function(x, y) sample(seq(x, y), 1), 
                       0, runs_df$terminal_inverted_repeat_elementLength - runs_df$width)


runs_df$start_rand <- runs_df$terminal_inverted_repeat_element_min + as.numeric(runs_df$rand)
runs_df$stop_rand <- runs_df$start_rand + (runs_df$width-1)

IR_runs_rand <- IRanges(start = runs_df$start_rand, end = runs_df$stop_rand)
GR_runs_rand <- GRanges(seqnames = runs_df$V1, ranges = IR_runs_rand)
GR_runs_rand_df <- as.data.frame(GR_runs_rand)

#CG <- CG[CG$perc != 0,]
IR_CG <- IRanges(start = CG$pos, width = 1)
GR_CG <- GRanges(seqnames = CG$V1, ranges = IR_CG)
#CG <- NULL
gc()



runs_df$start_chr <- runs_df$V2
runs_df$stop_chr <- runs_df$V3
runs_df$chr <- runs_df$V1

# Calculate window meth levels
runs_df$before_1   <- runs_df$start_chr-winsize*1  
runs_df$before_2   <- runs_df$start_chr-winsize*2  
runs_df$before_3   <- runs_df$start_chr-winsize*3  
runs_df$before_4   <- runs_df$start_chr-winsize*4  
runs_df$before_5   <- runs_df$start_chr-winsize*5  
runs_df$before_6   <- runs_df$start_chr-winsize*6  
runs_df$before_7   <- runs_df$start_chr-winsize*7  
runs_df$before_8   <- runs_df$start_chr-winsize*8  
runs_df$before_9   <- runs_df$start_chr-winsize*9  
runs_df$before_10  <- runs_df$start_chr-winsize*10 
runs_df$before_11  <- runs_df$start_chr-winsize*11 
runs_df$before_12  <- runs_df$start_chr-winsize*12 
runs_df$before_13  <- runs_df$start_chr-winsize*13 
runs_df$before_14  <- runs_df$start_chr-winsize*14 
runs_df$before_15  <- runs_df$start_chr-winsize*15 
runs_df$before_16  <- runs_df$start_chr-winsize*16 
runs_df$before_17  <- runs_df$start_chr-winsize*17 
runs_df$before_18  <- runs_df$start_chr-winsize*18 
runs_df$before_19  <- runs_df$start_chr-winsize*19 
runs_df$before_20  <- runs_df$start_chr-winsize*20 
IR_before_1  <- IRanges(start = runs_df$before_1 , width = winsize)
IR_before_2  <- IRanges(start = runs_df$before_2 , width = winsize)
IR_before_3  <- IRanges(start = runs_df$before_3 , width = winsize)
IR_before_4  <- IRanges(start = runs_df$before_4 , width = winsize)
IR_before_5  <- IRanges(start = runs_df$before_5 , width = winsize)
IR_before_6  <- IRanges(start = runs_df$before_6 , width = winsize)
IR_before_7  <- IRanges(start = runs_df$before_7 , width = winsize)
IR_before_8  <- IRanges(start = runs_df$before_8 , width = winsize)
IR_before_9  <- IRanges(start = runs_df$before_9 , width = winsize)
IR_before_10 <- IRanges(start = runs_df$before_10, width = winsize)
IR_before_11 <- IRanges(start = runs_df$before_11, width = winsize)
IR_before_12 <- IRanges(start = runs_df$before_12, width = winsize)
IR_before_13 <- IRanges(start = runs_df$before_13, width = winsize)
IR_before_14 <- IRanges(start = runs_df$before_14, width = winsize)
IR_before_15 <- IRanges(start = runs_df$before_15, width = winsize)
IR_before_16 <- IRanges(start = runs_df$before_16, width = winsize)
IR_before_17 <- IRanges(start = runs_df$before_17, width = winsize)
IR_before_18 <- IRanges(start = runs_df$before_18, width = winsize)
IR_before_19 <- IRanges(start = runs_df$before_19, width = winsize)
IR_before_20 <- IRanges(start = runs_df$before_20, width = winsize)
GR_before_1   <- GRanges(seqnames = runs_df$chr, ranges = IR_before_1 )
GR_before_2   <- GRanges(seqnames = runs_df$chr, ranges = IR_before_2 )
GR_before_3   <- GRanges(seqnames = runs_df$chr, ranges = IR_before_3 )
GR_before_4   <- GRanges(seqnames = runs_df$chr, ranges = IR_before_4 )
GR_before_5   <- GRanges(seqnames = runs_df$chr, ranges = IR_before_5 )
GR_before_6   <- GRanges(seqnames = runs_df$chr, ranges = IR_before_6 )
GR_before_7   <- GRanges(seqnames = runs_df$chr, ranges = IR_before_7 )
GR_before_8   <- GRanges(seqnames = runs_df$chr, ranges = IR_before_8 )
GR_before_9   <- GRanges(seqnames = runs_df$chr, ranges = IR_before_9 )
GR_before_10  <- GRanges(seqnames = runs_df$chr, ranges = IR_before_10)
GR_before_11  <- GRanges(seqnames = runs_df$chr, ranges = IR_before_11)
GR_before_12  <- GRanges(seqnames = runs_df$chr, ranges = IR_before_12)
GR_before_13  <- GRanges(seqnames = runs_df$chr, ranges = IR_before_13)
GR_before_14  <- GRanges(seqnames = runs_df$chr, ranges = IR_before_14)
GR_before_15  <- GRanges(seqnames = runs_df$chr, ranges = IR_before_15)
GR_before_16  <- GRanges(seqnames = runs_df$chr, ranges = IR_before_16)
GR_before_17  <- GRanges(seqnames = runs_df$chr, ranges = IR_before_17)
GR_before_18  <- GRanges(seqnames = runs_df$chr, ranges = IR_before_18)
GR_before_19  <- GRanges(seqnames = runs_df$chr, ranges = IR_before_19)
GR_before_20  <- GRanges(seqnames = runs_df$chr, ranges = IR_before_20)
runs_df$after_1   <- runs_df$stop_chr+winsize*1  
runs_df$after_2   <- runs_df$stop_chr+winsize*2  
runs_df$after_3   <- runs_df$stop_chr+winsize*3  
runs_df$after_4   <- runs_df$stop_chr+winsize*4  
runs_df$after_5   <- runs_df$stop_chr+winsize*5  
runs_df$after_6   <- runs_df$stop_chr+winsize*6  
runs_df$after_7   <- runs_df$stop_chr+winsize*7  
runs_df$after_8   <- runs_df$stop_chr+winsize*8  
runs_df$after_9   <- runs_df$stop_chr+winsize*9  
runs_df$after_10  <- runs_df$stop_chr+winsize*10 
runs_df$after_11  <- runs_df$stop_chr+winsize*11 
runs_df$after_12  <- runs_df$stop_chr+winsize*12 
runs_df$after_13  <- runs_df$stop_chr+winsize*13 
runs_df$after_14  <- runs_df$stop_chr+winsize*14 
runs_df$after_15  <- runs_df$stop_chr+winsize*15 
runs_df$after_16  <- runs_df$stop_chr+winsize*16 
runs_df$after_17  <- runs_df$stop_chr+winsize*17 
runs_df$after_18  <- runs_df$stop_chr+winsize*18 
runs_df$after_19  <- runs_df$stop_chr+winsize*19 
runs_df$after_20  <- runs_df$stop_chr+winsize*20 
IR_after_1  <- IRanges(end = runs_df$after_1 , width = winsize)
IR_after_2  <- IRanges(end = runs_df$after_2 , width = winsize)
IR_after_3  <- IRanges(end = runs_df$after_3 , width = winsize)
IR_after_4  <- IRanges(end = runs_df$after_4 , width = winsize)
IR_after_5  <- IRanges(end = runs_df$after_5 , width = winsize)
IR_after_6  <- IRanges(end = runs_df$after_6 , width = winsize)
IR_after_7  <- IRanges(end = runs_df$after_7 , width = winsize)
IR_after_8  <- IRanges(end = runs_df$after_8 , width = winsize)
IR_after_9  <- IRanges(end = runs_df$after_9 , width = winsize)
IR_after_10 <- IRanges(end = runs_df$after_10, width = winsize)
IR_after_11 <- IRanges(end = runs_df$after_11, width = winsize)
IR_after_12 <- IRanges(end = runs_df$after_12, width = winsize)
IR_after_13 <- IRanges(end = runs_df$after_13, width = winsize)
IR_after_14 <- IRanges(end = runs_df$after_14, width = winsize)
IR_after_15 <- IRanges(end = runs_df$after_15, width = winsize)
IR_after_16 <- IRanges(end = runs_df$after_16, width = winsize)
IR_after_17 <- IRanges(end = runs_df$after_17, width = winsize)
IR_after_18 <- IRanges(end = runs_df$after_18, width = winsize)
IR_after_19 <- IRanges(end = runs_df$after_19, width = winsize)
IR_after_20 <- IRanges(end = runs_df$after_20, width = winsize)
GR_after_1   <- GRanges(seqnames = runs_df$chr, ranges = IR_after_1 )
GR_after_2   <- GRanges(seqnames = runs_df$chr, ranges = IR_after_2 )
GR_after_3   <- GRanges(seqnames = runs_df$chr, ranges = IR_after_3 )
GR_after_4   <- GRanges(seqnames = runs_df$chr, ranges = IR_after_4 )
GR_after_5   <- GRanges(seqnames = runs_df$chr, ranges = IR_after_5 )
GR_after_6   <- GRanges(seqnames = runs_df$chr, ranges = IR_after_6 )
GR_after_7   <- GRanges(seqnames = runs_df$chr, ranges = IR_after_7 )
GR_after_8   <- GRanges(seqnames = runs_df$chr, ranges = IR_after_8 )
GR_after_9   <- GRanges(seqnames = runs_df$chr, ranges = IR_after_9 )
GR_after_10  <- GRanges(seqnames = runs_df$chr, ranges = IR_after_10)
GR_after_11  <- GRanges(seqnames = runs_df$chr, ranges = IR_after_11)
GR_after_12  <- GRanges(seqnames = runs_df$chr, ranges = IR_after_12)
GR_after_13  <- GRanges(seqnames = runs_df$chr, ranges = IR_after_13)
GR_after_14  <- GRanges(seqnames = runs_df$chr, ranges = IR_after_14)
GR_after_15  <- GRanges(seqnames = runs_df$chr, ranges = IR_after_15)
GR_after_16  <- GRanges(seqnames = runs_df$chr, ranges = IR_after_16)
GR_after_17  <- GRanges(seqnames = runs_df$chr, ranges = IR_after_17)
GR_after_18  <- GRanges(seqnames = runs_df$chr, ranges = IR_after_18)
GR_after_19  <- GRanges(seqnames = runs_df$chr, ranges = IR_after_19)
GR_after_20  <- GRanges(seqnames = runs_df$chr, ranges = IR_after_20)



vals <- c(
  sum(CG[as.data.frame(findOverlaps(GR_CG, GR_before_20))$queryHits,]$meth_reads_int) / sum(CG[as.data.frame(findOverlaps(GR_CG, GR_before_20))$queryHits,]$cov)  ,
  sum(CG[as.data.frame(findOverlaps(GR_CG, GR_before_19))$queryHits,]$meth_reads_int) / sum(CG[as.data.frame(findOverlaps(GR_CG, GR_before_19))$queryHits,]$cov)  ,
  sum(CG[as.data.frame(findOverlaps(GR_CG, GR_before_18))$queryHits,]$meth_reads_int) / sum(CG[as.data.frame(findOverlaps(GR_CG, GR_before_18))$queryHits,]$cov)  ,
  sum(CG[as.data.frame(findOverlaps(GR_CG, GR_before_17))$queryHits,]$meth_reads_int) / sum(CG[as.data.frame(findOverlaps(GR_CG, GR_before_17))$queryHits,]$cov)  ,
  sum(CG[as.data.frame(findOverlaps(GR_CG, GR_before_16))$queryHits,]$meth_reads_int) / sum(CG[as.data.frame(findOverlaps(GR_CG, GR_before_16))$queryHits,]$cov)  ,
  sum(CG[as.data.frame(findOverlaps(GR_CG, GR_before_15))$queryHits,]$meth_reads_int) / sum(CG[as.data.frame(findOverlaps(GR_CG, GR_before_15))$queryHits,]$cov)  ,
  sum(CG[as.data.frame(findOverlaps(GR_CG, GR_before_14))$queryHits,]$meth_reads_int) / sum(CG[as.data.frame(findOverlaps(GR_CG, GR_before_14))$queryHits,]$cov)  ,
  sum(CG[as.data.frame(findOverlaps(GR_CG, GR_before_13))$queryHits,]$meth_reads_int) / sum(CG[as.data.frame(findOverlaps(GR_CG, GR_before_13))$queryHits,]$cov)  ,
  sum(CG[as.data.frame(findOverlaps(GR_CG, GR_before_12))$queryHits,]$meth_reads_int) / sum(CG[as.data.frame(findOverlaps(GR_CG, GR_before_12))$queryHits,]$cov)  ,
  sum(CG[as.data.frame(findOverlaps(GR_CG, GR_before_11))$queryHits,]$meth_reads_int) / sum(CG[as.data.frame(findOverlaps(GR_CG, GR_before_11))$queryHits,]$cov)  ,
  sum(CG[as.data.frame(findOverlaps(GR_CG, GR_before_10))$queryHits,]$meth_reads_int) / sum(CG[as.data.frame(findOverlaps(GR_CG, GR_before_10))$queryHits,]$cov)  ,
  sum(CG[as.data.frame(findOverlaps(GR_CG, GR_before_9 ))$queryHits,]$meth_reads_int) / sum(CG[as.data.frame(findOverlaps(GR_CG, GR_before_9 ))$queryHits,]$cov)  ,
  sum(CG[as.data.frame(findOverlaps(GR_CG, GR_before_8 ))$queryHits,]$meth_reads_int) / sum(CG[as.data.frame(findOverlaps(GR_CG, GR_before_8 ))$queryHits,]$cov)  ,
  sum(CG[as.data.frame(findOverlaps(GR_CG, GR_before_7 ))$queryHits,]$meth_reads_int) / sum(CG[as.data.frame(findOverlaps(GR_CG, GR_before_7 ))$queryHits,]$cov)  ,
  sum(CG[as.data.frame(findOverlaps(GR_CG, GR_before_6 ))$queryHits,]$meth_reads_int) / sum(CG[as.data.frame(findOverlaps(GR_CG, GR_before_6 ))$queryHits,]$cov)  ,
  sum(CG[as.data.frame(findOverlaps(GR_CG, GR_before_5 ))$queryHits,]$meth_reads_int) / sum(CG[as.data.frame(findOverlaps(GR_CG, GR_before_5 ))$queryHits,]$cov)  ,
  sum(CG[as.data.frame(findOverlaps(GR_CG, GR_before_4 ))$queryHits,]$meth_reads_int) / sum(CG[as.data.frame(findOverlaps(GR_CG, GR_before_4 ))$queryHits,]$cov)  ,
  sum(CG[as.data.frame(findOverlaps(GR_CG, GR_before_3 ))$queryHits,]$meth_reads_int) / sum(CG[as.data.frame(findOverlaps(GR_CG, GR_before_3 ))$queryHits,]$cov)  ,
  sum(CG[as.data.frame(findOverlaps(GR_CG, GR_before_2 ))$queryHits,]$meth_reads_int) / sum(CG[as.data.frame(findOverlaps(GR_CG, GR_before_2 ))$queryHits,]$cov)  ,
  sum(CG[as.data.frame(findOverlaps(GR_CG, GR_before_1 ))$queryHits,]$meth_reads_int) / sum(CG[as.data.frame(findOverlaps(GR_CG, GR_before_1 ))$queryHits,]$cov)  ,
  sum(CG[as.data.frame(findOverlaps(GR_CG, GR_runs))$queryHits,]$meth_reads_int) / sum(CG[as.data.frame(findOverlaps(GR_CG, GR_runs))$queryHits,]$cov),
  sum(CG[as.data.frame(findOverlaps(GR_CG, GR_after_1 ))$queryHits,]$meth_reads_int) / sum(CG[as.data.frame(findOverlaps(GR_CG, GR_after_1 ))$queryHits,]$cov) ,
  sum(CG[as.data.frame(findOverlaps(GR_CG, GR_after_2 ))$queryHits,]$meth_reads_int) / sum(CG[as.data.frame(findOverlaps(GR_CG, GR_after_2 ))$queryHits,]$cov) ,
  sum(CG[as.data.frame(findOverlaps(GR_CG, GR_after_3 ))$queryHits,]$meth_reads_int) / sum(CG[as.data.frame(findOverlaps(GR_CG, GR_after_3 ))$queryHits,]$cov) ,
  sum(CG[as.data.frame(findOverlaps(GR_CG, GR_after_4 ))$queryHits,]$meth_reads_int) / sum(CG[as.data.frame(findOverlaps(GR_CG, GR_after_4 ))$queryHits,]$cov) ,
  sum(CG[as.data.frame(findOverlaps(GR_CG, GR_after_5 ))$queryHits,]$meth_reads_int) / sum(CG[as.data.frame(findOverlaps(GR_CG, GR_after_5 ))$queryHits,]$cov) ,
  sum(CG[as.data.frame(findOverlaps(GR_CG, GR_after_6 ))$queryHits,]$meth_reads_int) / sum(CG[as.data.frame(findOverlaps(GR_CG, GR_after_6 ))$queryHits,]$cov) ,
  sum(CG[as.data.frame(findOverlaps(GR_CG, GR_after_7 ))$queryHits,]$meth_reads_int) / sum(CG[as.data.frame(findOverlaps(GR_CG, GR_after_7 ))$queryHits,]$cov) ,
  sum(CG[as.data.frame(findOverlaps(GR_CG, GR_after_8 ))$queryHits,]$meth_reads_int) / sum(CG[as.data.frame(findOverlaps(GR_CG, GR_after_8 ))$queryHits,]$cov) ,
  sum(CG[as.data.frame(findOverlaps(GR_CG, GR_after_9 ))$queryHits,]$meth_reads_int) / sum(CG[as.data.frame(findOverlaps(GR_CG, GR_after_9 ))$queryHits,]$cov) ,
  sum(CG[as.data.frame(findOverlaps(GR_CG, GR_after_10))$queryHits,]$meth_reads_int) / sum(CG[as.data.frame(findOverlaps(GR_CG, GR_after_10))$queryHits,]$cov) ,
  sum(CG[as.data.frame(findOverlaps(GR_CG, GR_after_11))$queryHits,]$meth_reads_int) / sum(CG[as.data.frame(findOverlaps(GR_CG, GR_after_11))$queryHits,]$cov) ,
  sum(CG[as.data.frame(findOverlaps(GR_CG, GR_after_12))$queryHits,]$meth_reads_int) / sum(CG[as.data.frame(findOverlaps(GR_CG, GR_after_12))$queryHits,]$cov) ,
  sum(CG[as.data.frame(findOverlaps(GR_CG, GR_after_13))$queryHits,]$meth_reads_int) / sum(CG[as.data.frame(findOverlaps(GR_CG, GR_after_13))$queryHits,]$cov) ,
  sum(CG[as.data.frame(findOverlaps(GR_CG, GR_after_14))$queryHits,]$meth_reads_int) / sum(CG[as.data.frame(findOverlaps(GR_CG, GR_after_14))$queryHits,]$cov) ,
  sum(CG[as.data.frame(findOverlaps(GR_CG, GR_after_15))$queryHits,]$meth_reads_int) / sum(CG[as.data.frame(findOverlaps(GR_CG, GR_after_15))$queryHits,]$cov) ,
  sum(CG[as.data.frame(findOverlaps(GR_CG, GR_after_16))$queryHits,]$meth_reads_int) / sum(CG[as.data.frame(findOverlaps(GR_CG, GR_after_16))$queryHits,]$cov) ,
  sum(CG[as.data.frame(findOverlaps(GR_CG, GR_after_17))$queryHits,]$meth_reads_int) / sum(CG[as.data.frame(findOverlaps(GR_CG, GR_after_17))$queryHits,]$cov) ,
  sum(CG[as.data.frame(findOverlaps(GR_CG, GR_after_18))$queryHits,]$meth_reads_int) / sum(CG[as.data.frame(findOverlaps(GR_CG, GR_after_18))$queryHits,]$cov) ,
  sum(CG[as.data.frame(findOverlaps(GR_CG, GR_after_19))$queryHits,]$meth_reads_int) / sum(CG[as.data.frame(findOverlaps(GR_CG, GR_after_19))$queryHits,]$cov) ,
  sum(CG[as.data.frame(findOverlaps(GR_CG, GR_after_20))$queryHits,]$meth_reads_int) / sum(CG[as.data.frame(findOverlaps(GR_CG, GR_after_20))$queryHits,]$cov) 
)

# Random


#for (i in 1:nrow(runs_df)) {
#  runs_df$rand[[i]] <- sample(1:runs_df$terminal_inverted_repeat_elementLength[[i]], size = 1)
#}

runs_df$start_rand <- runs_df$terminal_inverted_repeat_element_min + as.numeric(runs_df$rand)
runs_df$stop_rand <- runs_df$start_rand + (runs_df$width-1)

IR_runs_rand <- IRanges(start = runs_df$start_rand, end = runs_df$stop_rand)
GR_runs_rand <- GRanges(seqnames = runs_df$chr, ranges = IR_runs_rand)
GR_runs_rand_df <- as.data.frame(GR_runs_rand)


runs_df$beforerand_1   <- runs_df$start_rand-winsize*1  
runs_df$beforerand_2   <- runs_df$start_rand-winsize*2  
runs_df$beforerand_3   <- runs_df$start_rand-winsize*3  
runs_df$beforerand_4   <- runs_df$start_rand-winsize*4  
runs_df$beforerand_5   <- runs_df$start_rand-winsize*5  
runs_df$beforerand_6   <- runs_df$start_rand-winsize*6  
runs_df$beforerand_7   <- runs_df$start_rand-winsize*7  
runs_df$beforerand_8   <- runs_df$start_rand-winsize*8  
runs_df$beforerand_9   <- runs_df$start_rand-winsize*9  
runs_df$beforerand_10  <- runs_df$start_rand-winsize*10 
runs_df$beforerand_11  <- runs_df$start_rand-winsize*11 
runs_df$beforerand_12  <- runs_df$start_rand-winsize*12 
runs_df$beforerand_13  <- runs_df$start_rand-winsize*13 
runs_df$beforerand_14  <- runs_df$start_rand-winsize*14 
runs_df$beforerand_15  <- runs_df$start_rand-winsize*15 
runs_df$beforerand_16  <- runs_df$start_rand-winsize*16 
runs_df$beforerand_17  <- runs_df$start_rand-winsize*17 
runs_df$beforerand_18  <- runs_df$start_rand-winsize*18 
runs_df$beforerand_19  <- runs_df$start_rand-winsize*19 
runs_df$beforerand_20  <- runs_df$start_rand-winsize*20 
IR_beforerand_1  <- IRanges(start = runs_df$beforerand_1 , width = winsize)
IR_beforerand_2  <- IRanges(start = runs_df$beforerand_2 , width = winsize)
IR_beforerand_3  <- IRanges(start = runs_df$beforerand_3 , width = winsize)
IR_beforerand_4  <- IRanges(start = runs_df$beforerand_4 , width = winsize)
IR_beforerand_5  <- IRanges(start = runs_df$beforerand_5 , width = winsize)
IR_beforerand_6  <- IRanges(start = runs_df$beforerand_6 , width = winsize)
IR_beforerand_7  <- IRanges(start = runs_df$beforerand_7 , width = winsize)
IR_beforerand_8  <- IRanges(start = runs_df$beforerand_8 , width = winsize)
IR_beforerand_9  <- IRanges(start = runs_df$beforerand_9 , width = winsize)
IR_beforerand_10 <- IRanges(start = runs_df$beforerand_10, width = winsize)
IR_beforerand_11 <- IRanges(start = runs_df$beforerand_11, width = winsize)
IR_beforerand_12 <- IRanges(start = runs_df$beforerand_12, width = winsize)
IR_beforerand_13 <- IRanges(start = runs_df$beforerand_13, width = winsize)
IR_beforerand_14 <- IRanges(start = runs_df$beforerand_14, width = winsize)
IR_beforerand_15 <- IRanges(start = runs_df$beforerand_15, width = winsize)
IR_beforerand_16 <- IRanges(start = runs_df$beforerand_16, width = winsize)
IR_beforerand_17 <- IRanges(start = runs_df$beforerand_17, width = winsize)
IR_beforerand_18 <- IRanges(start = runs_df$beforerand_18, width = winsize)
IR_beforerand_19 <- IRanges(start = runs_df$beforerand_19, width = winsize)
IR_beforerand_20 <- IRanges(start = runs_df$beforerand_20, width = winsize)
GR_beforerand_1   <- GRanges(seqnames = runs_df$chr, ranges = IR_beforerand_1 )
GR_beforerand_2   <- GRanges(seqnames = runs_df$chr, ranges = IR_beforerand_2 )
GR_beforerand_3   <- GRanges(seqnames = runs_df$chr, ranges = IR_beforerand_3 )
GR_beforerand_4   <- GRanges(seqnames = runs_df$chr, ranges = IR_beforerand_4 )
GR_beforerand_5   <- GRanges(seqnames = runs_df$chr, ranges = IR_beforerand_5 )
GR_beforerand_6   <- GRanges(seqnames = runs_df$chr, ranges = IR_beforerand_6 )
GR_beforerand_7   <- GRanges(seqnames = runs_df$chr, ranges = IR_beforerand_7 )
GR_beforerand_8   <- GRanges(seqnames = runs_df$chr, ranges = IR_beforerand_8 )
GR_beforerand_9   <- GRanges(seqnames = runs_df$chr, ranges = IR_beforerand_9 )
GR_beforerand_10  <- GRanges(seqnames = runs_df$chr, ranges = IR_beforerand_10)
GR_beforerand_11  <- GRanges(seqnames = runs_df$chr, ranges = IR_beforerand_11)
GR_beforerand_12  <- GRanges(seqnames = runs_df$chr, ranges = IR_beforerand_12)
GR_beforerand_13  <- GRanges(seqnames = runs_df$chr, ranges = IR_beforerand_13)
GR_beforerand_14  <- GRanges(seqnames = runs_df$chr, ranges = IR_beforerand_14)
GR_beforerand_15  <- GRanges(seqnames = runs_df$chr, ranges = IR_beforerand_15)
GR_beforerand_16  <- GRanges(seqnames = runs_df$chr, ranges = IR_beforerand_16)
GR_beforerand_17  <- GRanges(seqnames = runs_df$chr, ranges = IR_beforerand_17)
GR_beforerand_18  <- GRanges(seqnames = runs_df$chr, ranges = IR_beforerand_18)
GR_beforerand_19  <- GRanges(seqnames = runs_df$chr, ranges = IR_beforerand_19)
GR_beforerand_20  <- GRanges(seqnames = runs_df$chr, ranges = IR_beforerand_20)
runs_df$afterrand_1   <- runs_df$stop_rand+winsize*1  
runs_df$afterrand_2   <- runs_df$stop_rand+winsize*2  
runs_df$afterrand_3   <- runs_df$stop_rand+winsize*3  
runs_df$afterrand_4   <- runs_df$stop_rand+winsize*4  
runs_df$afterrand_5   <- runs_df$stop_rand+winsize*5  
runs_df$afterrand_6   <- runs_df$stop_rand+winsize*6  
runs_df$afterrand_7   <- runs_df$stop_rand+winsize*7  
runs_df$afterrand_8   <- runs_df$stop_rand+winsize*8  
runs_df$afterrand_9   <- runs_df$stop_rand+winsize*9  
runs_df$afterrand_10  <- runs_df$stop_rand+winsize*10 
runs_df$afterrand_11  <- runs_df$stop_rand+winsize*11 
runs_df$afterrand_12  <- runs_df$stop_rand+winsize*12 
runs_df$afterrand_13  <- runs_df$stop_rand+winsize*13 
runs_df$afterrand_14  <- runs_df$stop_rand+winsize*14 
runs_df$afterrand_15  <- runs_df$stop_rand+winsize*15 
runs_df$afterrand_16  <- runs_df$stop_rand+winsize*16 
runs_df$afterrand_17  <- runs_df$stop_rand+winsize*17 
runs_df$afterrand_18  <- runs_df$stop_rand+winsize*18 
runs_df$afterrand_19  <- runs_df$stop_rand+winsize*19 
runs_df$afterrand_20  <- runs_df$stop_rand+winsize*20 
IR_afterrand_1  <- IRanges(end = runs_df$afterrand_1 , width = winsize)
IR_afterrand_2  <- IRanges(end = runs_df$afterrand_2 , width = winsize)
IR_afterrand_3  <- IRanges(end = runs_df$afterrand_3 , width = winsize)
IR_afterrand_4  <- IRanges(end = runs_df$afterrand_4 , width = winsize)
IR_afterrand_5  <- IRanges(end = runs_df$afterrand_5 , width = winsize)
IR_afterrand_6  <- IRanges(end = runs_df$afterrand_6 , width = winsize)
IR_afterrand_7  <- IRanges(end = runs_df$afterrand_7 , width = winsize)
IR_afterrand_8  <- IRanges(end = runs_df$afterrand_8 , width = winsize)
IR_afterrand_9  <- IRanges(end = runs_df$afterrand_9 , width = winsize)
IR_afterrand_10 <- IRanges(end = runs_df$afterrand_10, width = winsize)
IR_afterrand_11 <- IRanges(end = runs_df$afterrand_11, width = winsize)
IR_afterrand_12 <- IRanges(end = runs_df$afterrand_12, width = winsize)
IR_afterrand_13 <- IRanges(end = runs_df$afterrand_13, width = winsize)
IR_afterrand_14 <- IRanges(end = runs_df$afterrand_14, width = winsize)
IR_afterrand_15 <- IRanges(end = runs_df$afterrand_15, width = winsize)
IR_afterrand_16 <- IRanges(end = runs_df$afterrand_16, width = winsize)
IR_afterrand_17 <- IRanges(end = runs_df$afterrand_17, width = winsize)
IR_afterrand_18 <- IRanges(end = runs_df$afterrand_18, width = winsize)
IR_afterrand_19 <- IRanges(end = runs_df$afterrand_19, width = winsize)
IR_afterrand_20 <- IRanges(end = runs_df$afterrand_20, width = winsize)
GR_afterrand_1   <- GRanges(seqnames = runs_df$chr, ranges = IR_afterrand_1 )
GR_afterrand_2   <- GRanges(seqnames = runs_df$chr, ranges = IR_afterrand_2 )
GR_afterrand_3   <- GRanges(seqnames = runs_df$chr, ranges = IR_afterrand_3 )
GR_afterrand_4   <- GRanges(seqnames = runs_df$chr, ranges = IR_afterrand_4 )
GR_afterrand_5   <- GRanges(seqnames = runs_df$chr, ranges = IR_afterrand_5 )
GR_afterrand_6   <- GRanges(seqnames = runs_df$chr, ranges = IR_afterrand_6 )
GR_afterrand_7   <- GRanges(seqnames = runs_df$chr, ranges = IR_afterrand_7 )
GR_afterrand_8   <- GRanges(seqnames = runs_df$chr, ranges = IR_afterrand_8 )
GR_afterrand_9   <- GRanges(seqnames = runs_df$chr, ranges = IR_afterrand_9 )
GR_afterrand_10  <- GRanges(seqnames = runs_df$chr, ranges = IR_afterrand_10)
GR_afterrand_11  <- GRanges(seqnames = runs_df$chr, ranges = IR_afterrand_11)
GR_afterrand_12  <- GRanges(seqnames = runs_df$chr, ranges = IR_afterrand_12)
GR_afterrand_13  <- GRanges(seqnames = runs_df$chr, ranges = IR_afterrand_13)
GR_afterrand_14  <- GRanges(seqnames = runs_df$chr, ranges = IR_afterrand_14)
GR_afterrand_15  <- GRanges(seqnames = runs_df$chr, ranges = IR_afterrand_15)
GR_afterrand_16  <- GRanges(seqnames = runs_df$chr, ranges = IR_afterrand_16)
GR_afterrand_17  <- GRanges(seqnames = runs_df$chr, ranges = IR_afterrand_17)
GR_afterrand_18  <- GRanges(seqnames = runs_df$chr, ranges = IR_afterrand_18)
GR_afterrand_19  <- GRanges(seqnames = runs_df$chr, ranges = IR_afterrand_19)
GR_afterrand_20  <- GRanges(seqnames = runs_df$chr, ranges = IR_afterrand_20)




valsrand <- c(
  sum(CG[as.data.frame(findOverlaps(GR_CG, GR_beforerand_20))$queryHits,]$meth_reads_int) / sum(CG[as.data.frame(findOverlaps(GR_CG, GR_beforerand_20))$queryHits,]$cov),
  sum(CG[as.data.frame(findOverlaps(GR_CG, GR_beforerand_19))$queryHits,]$meth_reads_int) / sum(CG[as.data.frame(findOverlaps(GR_CG, GR_beforerand_19))$queryHits,]$cov),
  sum(CG[as.data.frame(findOverlaps(GR_CG, GR_beforerand_18))$queryHits,]$meth_reads_int) / sum(CG[as.data.frame(findOverlaps(GR_CG, GR_beforerand_18))$queryHits,]$cov),
  sum(CG[as.data.frame(findOverlaps(GR_CG, GR_beforerand_17))$queryHits,]$meth_reads_int) / sum(CG[as.data.frame(findOverlaps(GR_CG, GR_beforerand_17))$queryHits,]$cov),
  sum(CG[as.data.frame(findOverlaps(GR_CG, GR_beforerand_16))$queryHits,]$meth_reads_int) / sum(CG[as.data.frame(findOverlaps(GR_CG, GR_beforerand_16))$queryHits,]$cov),
  sum(CG[as.data.frame(findOverlaps(GR_CG, GR_beforerand_15))$queryHits,]$meth_reads_int) / sum(CG[as.data.frame(findOverlaps(GR_CG, GR_beforerand_15))$queryHits,]$cov),
  sum(CG[as.data.frame(findOverlaps(GR_CG, GR_beforerand_14))$queryHits,]$meth_reads_int) / sum(CG[as.data.frame(findOverlaps(GR_CG, GR_beforerand_14))$queryHits,]$cov),
  sum(CG[as.data.frame(findOverlaps(GR_CG, GR_beforerand_13))$queryHits,]$meth_reads_int) / sum(CG[as.data.frame(findOverlaps(GR_CG, GR_beforerand_13))$queryHits,]$cov),
  sum(CG[as.data.frame(findOverlaps(GR_CG, GR_beforerand_12))$queryHits,]$meth_reads_int) / sum(CG[as.data.frame(findOverlaps(GR_CG, GR_beforerand_12))$queryHits,]$cov),
  sum(CG[as.data.frame(findOverlaps(GR_CG, GR_beforerand_11))$queryHits,]$meth_reads_int) / sum(CG[as.data.frame(findOverlaps(GR_CG, GR_beforerand_11))$queryHits,]$cov),
  sum(CG[as.data.frame(findOverlaps(GR_CG, GR_beforerand_10))$queryHits,]$meth_reads_int) / sum(CG[as.data.frame(findOverlaps(GR_CG, GR_beforerand_10))$queryHits,]$cov),
  sum(CG[as.data.frame(findOverlaps(GR_CG, GR_beforerand_9 ))$queryHits,]$meth_reads_int) / sum(CG[as.data.frame(findOverlaps(GR_CG, GR_beforerand_9 ))$queryHits,]$cov),
  sum(CG[as.data.frame(findOverlaps(GR_CG, GR_beforerand_8 ))$queryHits,]$meth_reads_int) / sum(CG[as.data.frame(findOverlaps(GR_CG, GR_beforerand_8 ))$queryHits,]$cov),
  sum(CG[as.data.frame(findOverlaps(GR_CG, GR_beforerand_7 ))$queryHits,]$meth_reads_int) / sum(CG[as.data.frame(findOverlaps(GR_CG, GR_beforerand_7 ))$queryHits,]$cov),
  sum(CG[as.data.frame(findOverlaps(GR_CG, GR_beforerand_6 ))$queryHits,]$meth_reads_int) / sum(CG[as.data.frame(findOverlaps(GR_CG, GR_beforerand_6 ))$queryHits,]$cov),
  sum(CG[as.data.frame(findOverlaps(GR_CG, GR_beforerand_5 ))$queryHits,]$meth_reads_int) / sum(CG[as.data.frame(findOverlaps(GR_CG, GR_beforerand_5 ))$queryHits,]$cov),
  sum(CG[as.data.frame(findOverlaps(GR_CG, GR_beforerand_4 ))$queryHits,]$meth_reads_int) / sum(CG[as.data.frame(findOverlaps(GR_CG, GR_beforerand_4 ))$queryHits,]$cov),
  sum(CG[as.data.frame(findOverlaps(GR_CG, GR_beforerand_3 ))$queryHits,]$meth_reads_int) / sum(CG[as.data.frame(findOverlaps(GR_CG, GR_beforerand_3 ))$queryHits,]$cov),
  sum(CG[as.data.frame(findOverlaps(GR_CG, GR_beforerand_2 ))$queryHits,]$meth_reads_int) / sum(CG[as.data.frame(findOverlaps(GR_CG, GR_beforerand_2 ))$queryHits,]$cov),
  sum(CG[as.data.frame(findOverlaps(GR_CG, GR_beforerand_1 ))$queryHits,]$meth_reads_int) / sum(CG[as.data.frame(findOverlaps(GR_CG, GR_beforerand_1 ))$queryHits,]$cov),
  sum(CG[as.data.frame(findOverlaps(GR_CG, GR_runs_rand))$queryHits,]$meth_reads_int) / sum(CG[as.data.frame(findOverlaps(GR_CG, GR_runs_rand))$queryHits,]$cov),
  sum(CG[as.data.frame(findOverlaps(GR_CG, GR_afterrand_1 ))$queryHits,]$meth_reads_int) / sum(CG[as.data.frame(findOverlaps(GR_CG, GR_afterrand_1 ))$queryHits,]$cov),
  sum(CG[as.data.frame(findOverlaps(GR_CG, GR_afterrand_2 ))$queryHits,]$meth_reads_int) / sum(CG[as.data.frame(findOverlaps(GR_CG, GR_afterrand_2 ))$queryHits,]$cov),
  sum(CG[as.data.frame(findOverlaps(GR_CG, GR_afterrand_3 ))$queryHits,]$meth_reads_int) / sum(CG[as.data.frame(findOverlaps(GR_CG, GR_afterrand_3 ))$queryHits,]$cov),
  sum(CG[as.data.frame(findOverlaps(GR_CG, GR_afterrand_4 ))$queryHits,]$meth_reads_int) / sum(CG[as.data.frame(findOverlaps(GR_CG, GR_afterrand_4 ))$queryHits,]$cov),
  sum(CG[as.data.frame(findOverlaps(GR_CG, GR_afterrand_5 ))$queryHits,]$meth_reads_int) / sum(CG[as.data.frame(findOverlaps(GR_CG, GR_afterrand_5 ))$queryHits,]$cov),
  sum(CG[as.data.frame(findOverlaps(GR_CG, GR_afterrand_6 ))$queryHits,]$meth_reads_int) / sum(CG[as.data.frame(findOverlaps(GR_CG, GR_afterrand_6 ))$queryHits,]$cov),
  sum(CG[as.data.frame(findOverlaps(GR_CG, GR_afterrand_7 ))$queryHits,]$meth_reads_int) / sum(CG[as.data.frame(findOverlaps(GR_CG, GR_afterrand_7 ))$queryHits,]$cov),
  sum(CG[as.data.frame(findOverlaps(GR_CG, GR_afterrand_8 ))$queryHits,]$meth_reads_int) / sum(CG[as.data.frame(findOverlaps(GR_CG, GR_afterrand_8 ))$queryHits,]$cov),
  sum(CG[as.data.frame(findOverlaps(GR_CG, GR_afterrand_9 ))$queryHits,]$meth_reads_int) / sum(CG[as.data.frame(findOverlaps(GR_CG, GR_afterrand_9 ))$queryHits,]$cov),
  sum(CG[as.data.frame(findOverlaps(GR_CG, GR_afterrand_10))$queryHits,]$meth_reads_int) / sum(CG[as.data.frame(findOverlaps(GR_CG, GR_afterrand_10))$queryHits,]$cov),
  sum(CG[as.data.frame(findOverlaps(GR_CG, GR_afterrand_11))$queryHits,]$meth_reads_int) / sum(CG[as.data.frame(findOverlaps(GR_CG, GR_afterrand_11))$queryHits,]$cov),
  sum(CG[as.data.frame(findOverlaps(GR_CG, GR_afterrand_12))$queryHits,]$meth_reads_int) / sum(CG[as.data.frame(findOverlaps(GR_CG, GR_afterrand_12))$queryHits,]$cov),
  sum(CG[as.data.frame(findOverlaps(GR_CG, GR_afterrand_13))$queryHits,]$meth_reads_int) / sum(CG[as.data.frame(findOverlaps(GR_CG, GR_afterrand_13))$queryHits,]$cov),
  sum(CG[as.data.frame(findOverlaps(GR_CG, GR_afterrand_14))$queryHits,]$meth_reads_int) / sum(CG[as.data.frame(findOverlaps(GR_CG, GR_afterrand_14))$queryHits,]$cov),
  sum(CG[as.data.frame(findOverlaps(GR_CG, GR_afterrand_15))$queryHits,]$meth_reads_int) / sum(CG[as.data.frame(findOverlaps(GR_CG, GR_afterrand_15))$queryHits,]$cov),
  sum(CG[as.data.frame(findOverlaps(GR_CG, GR_afterrand_16))$queryHits,]$meth_reads_int) / sum(CG[as.data.frame(findOverlaps(GR_CG, GR_afterrand_16))$queryHits,]$cov),
  sum(CG[as.data.frame(findOverlaps(GR_CG, GR_afterrand_17))$queryHits,]$meth_reads_int) / sum(CG[as.data.frame(findOverlaps(GR_CG, GR_afterrand_17))$queryHits,]$cov),
  sum(CG[as.data.frame(findOverlaps(GR_CG, GR_afterrand_18))$queryHits,]$meth_reads_int) / sum(CG[as.data.frame(findOverlaps(GR_CG, GR_afterrand_18))$queryHits,]$cov),
  sum(CG[as.data.frame(findOverlaps(GR_CG, GR_afterrand_19))$queryHits,]$meth_reads_int) / sum(CG[as.data.frame(findOverlaps(GR_CG, GR_afterrand_19))$queryHits,]$cov),
  sum(CG[as.data.frame(findOverlaps(GR_CG, GR_afterrand_20))$queryHits,]$meth_reads_int) / sum(CG[as.data.frame(findOverlaps(GR_CG, GR_afterrand_20))$queryHits,]$cov)
)

wins <- 1:41
meta <-data.frame(wins,vals,rep("In hairpin", n = 41))
colnames(meta) <- c("wins", "vals", "inhp")
metan <- data.frame(wins,valsrand,rep("Not in hairpin", n = 41))
colnames(metan) <- c("wins", "vals", "inhp")
meta <- rbind(meta, metan)

write_delim(meta, "CG_terminal_inverted_repeat_element_metadata.txt", delim = "\t")

ggplot(data = meta, aes(x = wins, y = vals, col = as.factor(inhp))) +
  geom_line() + 
  #geom_line(aes(x = wins, y = valsrand, col = "red")) +
  theme_bw() +
  #ylim(0,1) +
  xlab("Window") +
  ylab("CG methylation (prop. reads)") +
  scale_x_continuous(breaks= c(1, 21, 41), labels = c("-2 kb", "hairpin", "+2 kb"))
ggsave("hairpin_metaplot_CG_runvsrand_100bpwin_terminal_inverted_repeat_element.png", 
       device ="png", 
       scale = 2)
