library(data.table)
library(tidyverse)
library(ggpubr)
library(ggpmisc)
library(IRanges)
library(GenomicRanges)

setwd("~/structure/")


#Read hp file
runs_df <- fread("LP_hairpins_gt20_genes.bed")
IR_runs <- IRanges(start = runs_df$V2, end = runs_df$V3)
GR_runs <- GRanges(seqnames = runs_df$V1, ranges = IR_runs)
GR_runs_df <- as.data.frame(GR_runs)

vcf <- fread("filtered.genic.NAM.vcf", h=F, fill=TRUE)

vcf$V1 <- gsub(pattern = "chr", replacement = "",vcf$V1)
vcf$V3 <- gsub(pattern = "AC=", replacement = "",vcf$V3) %>% as.numeric()

IR_vcf <- IRanges(start = vcf$V2, width = 1)
GR_vcf <- GRanges(seqnames = vcf$V1, ranges = IR_vcf)

vcf$in_gene <- FALSE
vcf[as.data.frame(findOverlaps(GR_vcf, GR_genes))$queryHits,]$in_gene <- TRUE
vcf <- filter(vcf, vcf$in_gene == TRUE)

vcf$in_run <- FALSE
vcf[as.data.frame(findOverlaps(GR_vcf, GR_runs))$queryHits,]$in_run <- TRUE

# Calculate minor allele frequencies
vcf$MAF <- ifelse(vcf$V3 > 27,
                  abs(vcf$V3 - 54),
                  vcf$V3)

ggplot(data = vcf, aes(x = MAF, fill = in_run)) +
  geom_bar(col = "black",aes(y = ..prop..), position = position_dodge()) +
  xlim(0, 28) +scale_fill_manual(values = c("grey", "black")) +
  xlab("Minor allele frequency") +
  theme_bw()
ggsave("folded_SFS_genic_inhairpin_all.png", device = "png",scale = 2)


IR_agg <- IRanges(start = agg$pos, width = 1)
GR_agg <- GRanges(seqnames = agg$ID, ranges = IR_agg)
GR_agg_df <- as.data.frame(GR_agg)



agg_df <-merge(GR_agg_df, gff3, by.x = "seqnames", by.y = "ID")
agg_df$start_chr <- agg_df$GeneMin + agg_df$start-1
agg_df$stop_chr <- agg_df$GeneMin + agg_df$end-1

IR_agg <- IRanges(start = agg_df$start_chr, end = agg_df$stop_chr)
GR_agg <- GRanges(seqnames = agg_df$chr, ranges = IR_agg)

vcf$in_paired <- FALSE
vcf[as.data.frame(findOverlaps(GR_vcf, GR_agg))$queryHits,]$in_paired <- TRUE

vcf$V3<- ifelse(vcf$V3 %% 2 == 1,
                vcf$V3+1,
                vcf$V3)
vcf$MAF <- ifelse(vcf$V3 > 27,
                  abs(vcf$V3 - 54),
                  vcf$V3)

ggplot(data = vcf, aes(x = MAF, fill = in_paired)) +
  geom_bar(col = "black",aes(y = ..prop..), position = position_dodge()) +
  xlim(0, 28) +scale_fill_manual(values = c("grey", "black")) +
  xlab("Minor allele frequency") +
  theme_bw()
ggsave("folded_SFS_genic_inpair_all.png", device = "png",scale = 2)

ks.test(vcf[vcf$in_paired == TRUE,]$MAF, 
        vcf[vcf$in_paired == FALSE,]$MAF,
        alternative = "two.sided")
ks.test(vcf[vcf$in_run == TRUE,]$MAF, 
        vcf[vcf$in_run == FALSE,]$MAF,
        alternative = "two.sided")

