library(dplyr)
library(ggplot2)
library(gridExtra)
setwd("/home/yiliao/Documents/20200905GRrevision/TAD_SV/pop")

##################  For 14 strains

perm1 <- read.table("pop.del.bed.uniq.breakpionts.bed1_10bp.permutation.breaks.bed", header = F)
obs1 <- 15135

p1<-ggplot(data = perm1) +
  geom_histogram(aes(x=V1, y=..density..),binwidth = 1, color='black', fill='gold1') +
  geom_density(aes(x=V1), color='red',size=1) +
  geom_vline(aes(xintercept = obs1), color='blue') +
  labs(title="1 to 10bp deletions",x='Counts', y= 'Frequency' ) +
  scale_y_continuous(
    # Features of the first axis
    name = "Proportion",
    breaks=seq(0,0.006,0.001),
    # Add a se0cond axis and specify its features
    sec.axis = sec_axis(trans = ~.*10000, name="Counts")
  ) +
  theme_bw() +
  theme(
    panel.grid = element_line(color="grey85"),
    plot.title = element_text(size = 18, face = "bold",hjust=.5),
    axis.text.x = element_text(colour="grey20",size=11,hjust=.5,vjust=.5,face="plain"),
    axis.text.y = element_text(colour="grey20",size=11,hjust=.5,vjust=.5,face="plain"),
    axis.title.y = element_text(colour="grey20",size=13,hjust=.5,vjust=.5,face="bold"))

pvalue_exp1 <-  min(sum(obs1 < perm1), sum(obs1 > perm1)) * 2 / length(perm1$V1)

#################
perm2 <- read.table("pop.del.bed.uniq.breakpionts.bed11_2000bp.permutation.breaks.bed", header = F)
obs2 <- 3909

p2<-ggplot(data = perm2) +
  geom_histogram(aes(x=V1, y=..density..),binwidth = 1, color='black', fill='gold1') +
  geom_density(aes(x=V1), color='red',size=1) +
  geom_vline(aes(xintercept = obs2), color='blue') +
  labs(title="11 to 2kp deletions",x='Counts', y= 'Frequency' ) +
  scale_y_continuous(
    # Features of the first axis
    name = "Proportion",
    breaks=seq(0,0.01,0.002),
    # Add a se0cond axis and specify its features
    sec.axis = sec_axis(trans = ~.*10000, name="Counts")
  ) +
  theme_bw() +
  theme(
    panel.grid = element_line(color="grey85"),
    plot.title = element_text(size = 18, face = "bold",hjust=.5),
    axis.text.x = element_text(colour="grey20",size=11,hjust=.5,vjust=.5,face="plain"),
    axis.text.y = element_text(colour="grey20",size=11,hjust=.5,vjust=.5,face="plain"),
    axis.title.y = element_text(colour="grey20",size=13,hjust=.5,vjust=.5,face="bold"))

pvalue_exp2 <-  min(sum(obs2 < perm2), sum(obs2 > perm2)) * 2 / length(perm2$V1)

#################  3
perm3 <- read.table("pop.nonTE.ins.bed.uniq.bed1_10bp.permutation.breaks.bed", header = F)
obs3 <-2450 

p3<-ggplot(data = perm3) +
  geom_histogram(aes(x=V1, y=..density..),binwidth = 1, color='black', fill='gold1') +
  geom_density(aes(x=V1), color='red',size=1) +
  geom_vline(aes(xintercept = obs3), color='blue') +
  labs(title="1 to 10bp NonTE insertions",x='Counts', y= 'Frequency' ) +
  scale_y_continuous(
    # Features of the first axis
    name = "Proportion",
    breaks=seq(0,0.01,0.002),
    # Add a se0cond axis and specify its features
    sec.axis = sec_axis(trans = ~.*10000, name="Counts")
  ) +
  theme_bw() +
  theme(
    panel.grid = element_line(color="grey85"),
    plot.title = element_text(size = 18, face = "bold",hjust=.5),
    axis.text.x = element_text(colour="grey20",size=11,hjust=.5,vjust=.5,face="plain"),
    axis.text.y = element_text(colour="grey20",size=11,hjust=.5,vjust=.5,face="plain"),
    axis.title.y = element_text(colour="grey20",size=13,hjust=.5,vjust=.5,face="bold"))

pvalue_exp3 <-  min(sum(obs3 < perm3), sum(obs3 > perm3)) * 2 / length(perm3$V1)

################# 
perm4 <- read.table("pop.nonTE.ins.bed.uniq.bed11_20kb.permutation.breaks.bed", header = F)
obs4 <- 1464

p4<-ggplot(data = perm4) +
  geom_histogram(aes(x=V1, y=..density..),binwidth = 1, color='black', fill='gold1') +
  geom_density(aes(x=V1), color='red',size=1) +
  geom_vline(aes(xintercept = obs4), color='blue') +
  labs(title="11 to 20kb NonTE insertions",x='Counts', y= 'Frequency' ) +
  scale_y_continuous(
    # Features of the first axis
    name = "Proportion",
    breaks=seq(0,0.01,0.002),
    # Add a se0cond axis and specify its features
    sec.axis = sec_axis(trans = ~.*10000, name="Counts")
  ) +
  theme_bw() +
  theme(
    panel.grid = element_line(color="grey85"),
    plot.title = element_text(size = 18, face = "bold",hjust=.5),
    axis.text.x = element_text(colour="grey20",size=11,hjust=.5,vjust=.5,face="plain"),
    axis.text.y = element_text(colour="grey20",size=11,hjust=.5,vjust=.5,face="plain"),
    axis.title.y = element_text(colour="grey20",size=13,hjust=.5,vjust=.5,face="bold"))

pvalue_exp4 <-  min(sum(obs4 < perm4), sum(obs4 > perm4)) * 2 / length(perm4$V1)


##################
##################  For D. sim species

setwd("/home/yiliao/Documents/20200905GRrevision/TAD_SV/3pecies")

perm5 <- read.table("3p.del.bed.uniq.breakpionts.bed1_10bp.permutation.breaks.bed", header = F)
obs5 <- 54599

p5<-ggplot(data = perm5) +
  geom_histogram(aes(x=V1, y=..density..),binwidth = 1, color='black', fill='gold1') +
  geom_density(aes(x=V1), color='red',size=1) +
  geom_vline(aes(xintercept = obs5), color='blue') +
  labs(title="1 to 10bp deletions",x='Counts', y= 'Frequency' ) +
  scale_y_continuous(
    # Features of the first axis
    name = "Proportion",
    breaks=seq(0,0.004,0.001),
    # Add a se0cond axis and specify its features
    sec.axis = sec_axis(trans = ~.*10000, name="Counts")
  ) +
  theme_bw() +
  theme(
    panel.grid = element_line(color="grey85"),
    plot.title = element_text(size = 18, face = "bold",hjust=.5),
    axis.text.x = element_text(colour="grey20",size=11,hjust=.5,vjust=.5,face="plain"),
    axis.text.y = element_text(colour="grey20",size=11,hjust=.5,vjust=.5,face="plain"),
    axis.title.y = element_text(colour="grey20",size=13,hjust=.5,vjust=.5,face="bold"))

pvalue_exp5 <-  min(sum(obs5 < perm5), sum(obs5 > perm5)) * 2 / length(perm5$V1)


#################

perm6 <- read.table("3p.del.bed.uniq.breakpionts.bed11_2000bp.permutation.breaks.bed", header = F)
obs6 <- 13494

p6<-ggplot(data = perm6) +
  geom_histogram(aes(x=V1, y=..density..),binwidth = 1, color='black', fill='gold1') +
  geom_density(aes(x=V1), color='red',size=1) +
  geom_vline(aes(xintercept = obs6), color='blue') +
  labs(title="11 to 2kb deletions",x='Counts', y= 'Frequency' ) +
  scale_y_continuous(
    # Features of the first axis
    name = "Proportion",
    breaks=seq(0,0.007,0.001),
    # Add a se0cond axis and specify its features
    sec.axis = sec_axis(trans = ~.*10000, name="Counts")
  ) +
  theme_bw() +
  theme(
    panel.grid = element_line(color="grey85"),
    plot.title = element_text(size = 18, face = "bold",hjust=.5),
    axis.text.x = element_text(colour="grey20",size=11,hjust=.5,vjust=.5,face="plain"),
    axis.text.y = element_text(colour="grey20",size=11,hjust=.5,vjust=.5,face="plain"),
    axis.title.y = element_text(colour="grey20",size=13,hjust=.5,vjust=.5,face="bold"))

pvalue_exp6 <-  min(sum(obs6 < perm6), sum(obs6 > perm6)) * 2 / length(perm6$V1)

#################
perm7 <- read.table("3p.NonTE.ins.bed.uniq.bed1_10bp.permutation.breaks.bed", header = F)
obs7 <-11287

p7<-ggplot(data = perm7) +
  geom_histogram(aes(x=V1, y=..density..),binwidth = 1, color='black', fill='gold1') +
  geom_density(aes(x=V1), color='red',size=1) +
  geom_vline(aes(xintercept = obs7), color='blue') +
  labs(title="1 to 10bp NonTE insertions",x='Counts', y= 'Frequency' ) +
  scale_y_continuous(
    # Features of the first axis
    name = "Proportion",
    breaks=seq(0,0.007,0.001),
    # Add a se0cond axis and specify its features
    sec.axis = sec_axis(trans = ~.*10000, name="Counts")
  ) +
  theme_bw() +
  theme(
    panel.grid = element_line(color="grey85"),
    plot.title = element_text(size = 18, face = "bold",hjust=.5),
    axis.text.x = element_text(colour="grey20",size=11,hjust=.5,vjust=.5,face="plain"),
    axis.text.y = element_text(colour="grey20",size=11,hjust=.5,vjust=.5,face="plain"),
    axis.title.y = element_text(colour="grey20",size=13,hjust=.5,vjust=.5,face="bold"))

pvalue_exp7 <-  min(sum(obs7 < perm7), sum(obs7 > perm7)) * 2 / length(perm7$V1)

#################
perm8 <- read.table("3p.NonTE.ins.bed.uniq.bed11_20kb.permutation.breaks.bed", header = F)
obs8 <- 6982

p8<-ggplot(data = perm8) +
  geom_histogram(aes(x=V1, y=..density..),binwidth = 1, color='black', fill='gold1') +
  geom_density(aes(x=V1), color='red',size=1) +
  geom_vline(aes(xintercept = obs8), color='blue') +
  labs(title="11 to 20kb NonTE insertions",x='Counts', y= 'Frequency' ) +
  scale_y_continuous(
    # Features of the first axis
    name = "Proportion",
    breaks=seq(0,0.007,0.001),
    # Add a se0cond axis and specify its features
    sec.axis = sec_axis(trans = ~.*10000, name="Counts")
  ) +
  theme_bw() +
  theme(
    panel.grid = element_line(color="grey85"),
    plot.title = element_text(size = 18, face = "bold",hjust=.5),
    axis.text.x = element_text(colour="grey20",size=11,hjust=.5,vjust=.5,face="plain"),
    axis.text.y = element_text(colour="grey20",size=11,hjust=.5,vjust=.5,face="plain"),
    axis.title.y = element_text(colour="grey20",size=13,hjust=.5,vjust=.5,face="bold"))

pvalue_exp8 <-  min(sum(obs8 < perm8), sum(obs8 > perm8)) * 2 / length(perm8$V1)

############################
par(mar=c(5,5,5,5))
grid.arrange(p1,p5,p2,p6,p3,p7,p4,p8, nrow=4)
pvalue_exp1 
pvalue_exp2 
pvalue_exp3 
pvalue_exp4 
pvalue_exp5 
pvalue_exp6 
pvalue_exp7
pvalue_exp8 
