library(dplyr)
library(tidyverse)
library(stringi)
library(ggplot2)
gene_len<-(read.csv("/home/users/ayh/Projects/27_A3B/03_sequencing/single_clone_sequencing/WGS/28_genomic_feature/01_refernce/fin/v3/intergenic/protein_coding_genic_len.TCN.txt",
                    header=F)%>%as.tibble())$V1
gene_len<-gene_len*2
intergenic_len<-(read.csv("/home/users/ayh/Projects/27_A3B/03_sequencing/single_clone_sequencing/WGS/28_genomic_feature/01_refernce/fin/v3/intergenic/protein_coding_intergenic_len.TCN.txt",
                          header=F)%>%as.tibble())$V1
intergenic_len<-intergenic_len*2

gene_len
intergenic_len
read_tsv("/home/users/ayh/Projects/27_A3B/03_sequencing/single_clone_sequencing/WGS/28_genomic_feature/01_refernce/fin/v3/intergenic/A3A.total.intergenic.raw.rh.vcf")%>%
  filter(!ALT%in%c("A","C","G","T"))
inter_df<-read_tsv("/home/users/ayh/Projects/27_A3B/03_sequencing/single_clone_sequencing/WGS/28_genomic_feature/01_refernce/fin/v3/intergenic/A3A.total.intergenic.raw.rh.vcf")%>%
  unique()%>%
  mutate(mut_type=ifelse(REF=="C",paste(REF,ALT,sep=">"),paste(chartr("ATGC","TACG",REF),chartr("ATGC","TACG",ALT),sep=">")))%>%
  mutate(mut_type=ifelse(!ALT%in%c("A","C","G","T"),substr(sig_cont,2,4),mut_type))
inter_df$mut_type%>%unique()
inter_df$id%>%unique()
gene_df<-read_tsv("/home/users/ayh/Projects/27_A3B/03_sequencing/single_clone_sequencing/WGS/28_genomic_feature/01_refernce/fin/v3/intergenic/A3A.total.gene.raw.rh.vcf")%>%
  unique()%>%

  mutate(mut_type=ifelse(REF=="C",paste(REF,ALT,sep=">"),paste(chartr("ATGC","TACG",REF),chartr("ATGC","TACG",ALT),sep=">")))%>%
  mutate(mut_type=ifelse(!ALT%in%c("A","C","G","T"),substr(sig_cont,2,4),mut_type))





##by sample
###########

inter_gene_sample_df<-rbind(inter_df%>%group_by(id,TP53)%>%
                              dplyr::summarise(n=n())%>%
                              mutate(len=intergenic_len)%>%
                              mutate(rate=n/intergenic_len)%>%
                              mutate(class="intergenic")
                            ,
                            gene_df%>%group_by(id,TP53)%>%
                              dplyr::summarise(n=n())%>%
                              mutate(len=gene_len)%>%
                              mutate(rate=n/gene_len)%>%
                              mutate(class="genic")
)
inter_df%>%filter(id=="A3A_1st_C3_100ng-1")%>%nrow()
gene_df%>%filter(id=="A3A_1st_C3_100ng-1")
inter_gene_sample_df%>%filter(id=="A3A_1st_C3_100ng-1")

inter_gene_sample_df%>%select(-len,-rate)%>%
  spread(class,n)%>%
  mutate(genic_rate=genic/gene_len,intergenic_rate=intergenic/intergenic_len)%>%
  mutate(genic_rate_ratio=genic_rate/(genic_rate+intergenic_rate),
         intergenic_rate_ratio=intergenic_rate/(intergenic_rate+genic_rate))


left_join(inter_gene_sample_df%>%
            select(id,TP53,class,n)%>%
            spread(class,n)%>%
            mutate(genic_rate=genic/gene_len,
                   intergenic_rate=intergenic/intergenic_len)
          ,
          inter_gene_sample_df%>%select(id,TP53,rate,class)%>%
            spread(class,rate)%>%
            mutate(gene_ratio=genic/(genic+intergenic),
                   intergenic_ratio=intergenic/(genic+intergenic))%>%
            select(id,gene_ratio,intergenic_ratio)
)
inter_gene_sample_df$TP53<-factor(inter_gene_sample_df$TP53,levels=c("WT","KO"))
inter_gene_sample_df$class<-factor(inter_gene_sample_df$class,levels=c("intergenic","genic"))
inter_gene_sample_ci_df<-inter_gene_sample_df%>%select(id,TP53,rate,class)%>%
  spread(class,rate)%>%
  mutate(genic_ratio=genic/(genic+intergenic),
         intergenic_ratio=intergenic/(genic+intergenic))%>%
  select(id,TP53,genic_ratio,intergenic_ratio)%>%
  gather(class,ratio,genic_ratio:intergenic_ratio)%>%
  mutate(class=gsub("_ratio","",class))%>%
  group_by(TP53,class)%>%
  summarise(mean.ratio = mean(ratio, na.rm = TRUE),
            sd.ratio = sd(ratio, na.rm = TRUE),
            n.ratio = n()) %>%
  mutate(se.ratio = sd.ratio / sqrt(n.ratio),
         lower.ci.ratio = mean.ratio - qt(1 - (0.05 / 2), n.ratio - 1) * se.ratio,
         upper.ci.ratio = mean.ratio + qt(1 - (0.05 / 2), n.ratio - 1) * se.ratio)
inter_gene_sample_ci_df$class<-factor(inter_gene_sample_ci_df$class,levels=c("intergenic","genic"))
#  filter(TP53=="WT")%>%
p2<-inter_gene_sample_ci_df%>%
  ggplot(aes(x=class,y=mean.ratio,fill=class,col="black"))+
  geom_bar(stat="identity")+
  geom_errorbar(aes(y=mean.ratio,ymax=upper.ci.ratio,ymin=lower.ci.ratio),width=.5)+
  facet_wrap(~TP53)+
  scale_y_continuous(lim=c(0,1))+
  theme_bw()+
  theme(axis.title.x=element_blank(),
        axis.text.x=element_text(size=30),
        #plot.title=element_text(size=30),
        axis.text.y=element_text(size=40),
        axis.title.y=element_text(size=40),
        axis.ticks.length=unit(.4, "cm"),
        axis.ticks = element_line(size = 4)
  )+
  
  scale_color_manual(values=c("black"))+
  scale_y_continuous(lim=c(0,1))

p2<-inter_gene_sample_ci_df%>%
  filter(TP53=="WT")%>%
  ggplot(aes(x=class,y=mean.ratio,fill=class,col="black"))+
  geom_bar(stat="identity")+
  geom_errorbar(aes(y=mean.ratio,ymax=upper.ci.ratio,ymin=lower.ci.ratio),width=.5)+
  #  facet_wrap(~TP53)+
  scale_y_continuous(lim=c(0,1))+
  theme_classic()+
  theme(axis.title.x=element_blank(),
        axis.text.x=element_text(size=30),
        #plot.title=element_text(size=30),
        axis.text.y=element_text(size=40),
        axis.title.y=element_text(size=40),
        axis.ticks.length=unit(.4, "cm"),
        axis.ticks = element_line(size = 4)
  )+
  
  scale_color_manual(values=c("black"))+
  scale_y_continuous(lim=c(0,1))

p2



ggsave("/home/users/ayh/Projects/27_A3B/06_Figure_code/edit_figure/Fig4/240222//intergenic.ci.pdf",p2,
       width=10,height=8)



inter_gene_sample_df%>%select(id,TP53,rate,class)%>%
  spread(class,rate)%>%
  mutate(genic_ratio=genic/(genic+intergenic),
         intergenic_ratio=intergenic/(genic+intergenic))%>%
  select(id,TP53,genic_ratio,intergenic_ratio)%>%
  gather(class,ratio,genic_ratio:intergenic_ratio)%>%
  mutate(class=gsub("_ratio","",class))%>%
  group_by(TP53)%>%
  rstatix::t_test(ratio~class)

#TP53  .y.   group1 group2        n1    n2 statistic    df        p
#* <fct> <chr> <chr>  <chr>      <int> <int>     <dbl> <dbl>    <dbl>
#  1 WT    ratio genic  intergenic    10    10     -21.2    18 3.56e-14
#2 KO    ratio genic  intergenic     6     6     -15.0    10 3.58e- 8


inter_gene_sample_df%>%select(id,TP53,rate,class)%>%
  spread(class,rate)%>%
  mutate(genic_ratio=genic/(genic+intergenic),
         intergenic_ratio=intergenic/(genic+intergenic))%>%
  select(id,TP53,genic_ratio,intergenic_ratio)%>%
  gather(class,ratio,genic_ratio:intergenic_ratio)%>%
  mutate(class=gsub("_ratio","",class))%>%
  group_by(TP53)%>%
  rstatix::pairwise_t_test(ratio~class)

inter_gene_sample_rate_ci_df<-inter_gene_sample_df%>%select(id,TP53,rate,class)%>%
  #spread(class,rate)%>%
  group_by(TP53,class)%>%
  summarise(mean.rate = mean(rate, na.rm = TRUE),
            sd.rate = sd(rate, na.rm = TRUE),
            n.rate = n()) %>%
  mutate(se.rate = sd.rate / sqrt(n.rate),
         lower.ci.rate = mean.rate - qt(1 - (0.05 / 2), n.rate - 1) * se.rate,
         upper.ci.rate = mean.rate + qt(1 - (0.05 / 2), n.rate - 1) * se.rate)
inter_gene_sample_ci_df$class<-factor(inter_gene_sample_ci_df$class,levels=c("intergenic","genic"))


inter_gene_sample_df%>%select(id,TP53,rate,class)%>%
  #  spread(class,rate)%>%
  group_by(TP53)%>%
  rstatix::t_test(rate~class)

inter_gene_sample_df%>%select(id,TP53,rate,class)%>%
  #  spread(class,rate)%>%
  group_by(TP53)%>%
  rstatix::pairwise_t_test(rate~class)

inter_gene_sample_df%>%select(id,TP53,rate,class)%>%
  #  spread(class,rate)%>%
  group_by(TP53)%>%
  rstatix::wilcox_test(rate~class)

inter_gene_sample_df%>%select(id,TP53,rate,class)%>%
  #  filter(grepl("3ug",id))%>%
  #  filter(TP53=="WT")%>%
  #    spread(class,rate)%>%
  group_by(TP53)%>%
  rstatix::wilcox_test(rate~class)

#install.packages("DescTools")
#library("DescTools")
#DunnTest(formula=rate~class,data=inter_gene_sample_df%>%select(id,TP53,rate,class)%>%filter(TP53=="WT"))
#pairwise.wilcox.test((inter_gene_sample_df%>%select(id,TP53,rate,class)%>%filter(class=="intergenic")%>%filter(TP53=="WT"))$rate*10000000, (inter_gene_sample_df%>%select(id,TP53,rate,class)%>%filter(class=="genic")%>%filter(TP53=="WT"))$rate*10000000, p.adjust.method="bonferroni", correct=FALSE)

#TP53  .y.   group1     group2    n1    n2 statistic    df      p
#* <fct> <chr> <chr>      <chr>  <int> <int>     <dbl> <dbl>  <dbl>
#  1 WT    rate  intergenic genic     10    10     0.914 15.7  0.375 
#2 KO    rate  intergenic genic      6     6     2.16   8.77 0.0596


inter_gene_sample_rate_ci_df$class<-factor(inter_gene_sample_rate_ci_df$class,levels=c("intergenic","genic"))
p_intergenic_ci<-inter_gene_sample_rate_ci_df%>%
  filter(TP53=="WT")%>%
  ggplot(aes(x=class,y=mean.rate*1000000,col="black",fill=class))+
  geom_bar(stat="identity")+
  geom_errorbar(aes(y=mean.rate*1000000,ymax=upper.ci.rate*1000000,ymin=lower.ci.rate*1000000),width=.5)+
  #facet_wrap(~TP53)+
  theme_classic()+
  theme(axis.title.x=element_blank(),
        axis.text.x=element_text(size=30),
        #plot.title=element_text(size=30),
        axis.text.y=element_text(size=40),
        axis.title.y=element_text(size=40),
        axis.ticks.length=unit(.4, "cm"),
        axis.ticks = element_line(size = 4),
        legend.position="none"
  )+
  scale_colour_manual(values=c("black"))+
  scale_y_continuous(lim=c(0,15))
p_intergenic_ci
ggsave("/home/users/ayh/Projects/27_A3B/06_Figure_code/edit_figure/Fig4/240222//intergenic.rate.ci.pdf",p_intergenic_ci,
       height=10,width=8)



ggplot(inter_gene_sample_df,aes(x=TP53,y=rate,col=class))+
  geom_boxplot()



ggplot(inter_gene_sample_df,aes(x=TP53,y=rate,col=class))+
  geom_boxplot()


##by TP53
#########
gene_df%>%select(-id,-FORMAT)%>%unique()%>%group_by(TP53,Func_refGene)%>%
  dplyr::summarise(n=n())

inter_df%>%select(-id,-FORMAT)%>%unique()%>%group_by(TP53)%>%
  dplyr::summarise(n=n())%>%
  mutate(len=intergenic_len)%>%
  mutate(rate=n/intergenic_len)%>%
  mutate(class="intergenic")

inter_gene_group_df<-rbind(inter_df%>%select(-id,-FORMAT)%>%unique()%>%group_by(TP53)%>%
                             dplyr::summarise(n=n())%>%
                             mutate(len=intergenic_len)%>%
                             mutate(rate=n/intergenic_len)%>%
                             mutate(class="intergenic")
                           ,
                           gene_df%>%select(-id,-FORMAT)%>%unique()%>%group_by(TP53)%>%
                             dplyr::summarise(n=n())%>%
                             mutate(len=gene_len)%>%
                             mutate(rate=n/gene_len)%>%
                             mutate(class="genic")
)


inter_gene_group_df%>%
  select(TP53,class,n)%>%
  spread(class,n)%>%
  mutate(genic_rate=genic/gene_len/6,
         intergenic_rate=intergenic/intergenic_len/6)%>%
  mutate(genic_rate_ratio=genic_rate/(genic_rate+intergenic_rate),
         intergenic_rate_ratio=intergenic_rate/(intergenic_rate+genic_rate))

inter_gene_group_df%>%
  select(TP53,class,n)%>%
  spread(class,n)%>%
  mutate(genic_rate=genic/gene_len/6,
         intergenic_rate=intergenic/intergenic_len/6)%>%
  mutate(genic_rate_ratio=genic_rate/(genic_rate+intergenic_rate),
         intergenic_rate_ratio=intergenic_rate/(intergenic_rate+genic_rate))

inter_df<-inter_gene_group_df%>%
  select(TP53,class,n)%>%
  spread(class,n)%>%
  mutate(genic_rate=ifelse(TP53=="WT",genic/gene_len/10,genic/gene_len/6),
         intergenic_rate=ifelse(TP53=="WT",intergenic/intergenic_len/10,intergenic/intergenic_len/6))%>%
  mutate(genic_rate_ratio=genic_rate/(genic_rate+intergenic_rate),
         intergenic_rate_ratio=intergenic_rate/(intergenic_rate+genic_rate))%>%
  filter(TP53=="WT")%>%
  gather(region,rate,genic_rate:intergenic_rate)#%>%

inter_gene_group_df%>%
  select(TP53,class,n)%>%
  spread(class,n)%>%
  mutate(genic_rate=ifelse(TP53=="WT",genic/gene_len/10,genic/gene_len/6),
         intergenic_rate=ifelse(TP53=="WT",intergenic/intergenic_len/10,intergenic/intergenic_len/6))%>%
  mutate(genic_rate_ratio=genic_rate/(genic_rate+intergenic_rate),
         intergenic_rate_ratio=intergenic_rate/(intergenic_rate+genic_rate))%>%
  filter(TP53=="KO")%>%
  gather(region,rate,genic_rate:intergenic_rate)

inter_df
#plyr::rename(c("region"="class"))

inter_df%>%select(genic,intergenic)%>%unique()
#genic intergenic
#<int>      <int>
#4660       6731

rstatix::chisq_test(c(4660,6731),p=c(gene_len/(gene_len+intergenic_len),intergenic_len/(gene_len+intergenic_len)))
#n statistic      p    df method          p.signif
#* <int>     <dbl>  <dbl> <dbl> <chr>           <chr>   
#  1     2      3.00 0.0831     1 Chi-square test ns      

rstatix::chisq_test(c(4660,6729),p=c(gene_len/(gene_len+intergenic_len),intergenic_len/(gene_len+intergenic_len)))%>%
  write.table("/home/users/ayh/Projects/27_A3B/06_Figure_code/edit_figure/Fig4/240222/data/intergenic/intergenic.chisq.pval.txt",
              sep="\t",
              quote=F,
              row.names=F)


inter_df$region<-factor(inter_df$region,levels=c("intergenic_rate","genic_rate"))
avg_rate_df<-read_tsv("/home/users/ayh/Projects/27_A3B/03_sequencing/single_clone_sequencing/WGS/28_genomic_feature/01_refernce/fin/v3/mappable/average_mut_rate_TCN_NGA.region.txt")
avg_rate_df

left_join(inter_gene_group_df%>%
  select(TP53,class,n)%>%
  spread(class,n)%>%
  mutate(genic_rate=ifelse(TP53=="WT",genic/gene_len/10,genic/gene_len/6),
         intergenic_rate=ifelse(TP53=="WT",intergenic/intergenic_len/10,intergenic/intergenic_len/6))%>%
  mutate(genic_rate_ratio=genic_rate/(genic_rate+intergenic_rate),
         intergenic_rate_ratio=intergenic_rate/(intergenic_rate+genic_rate))%>%
  filter(TP53=="WT")%>%
  gather(class,rate,genic_rate:intergenic_rate),avg_rate_df%>%select()

p<-inter_df%>%
  ggplot(aes(x=region,y=rate*1000000,fill=region,col=TP53))+
  geom_hline(yintercept=(avg_rate_df%>%filter(TP53=="WT")%>%filter(class=="mappable"))$mut_rate*1000000,linetype="dashed",colour="darkgrey")+
  geom_bar(stat="identity")+
  ggtitle("WT_intergenic")+
  theme_classic()+
  
  theme(axis.title.x=element_blank(),
        axis.text.x=element_text(size=30),
        #plot.title=element_text(size=30),
        axis.text.y=element_text(size=40),
        axis.title.y=element_text(size=40),
        axis.ticks.length=unit(.4, "cm"),
        axis.ticks = element_line(size = 4)
  )+
  
  scale_color_manual(values=c("black"))+
  scale_y_continuous(lim=c(0,3))

p
if(FALSE){
ggsave("/home/users/ayh/Projects/27_A3B/06_Figure_code/edit_figure/Fig4/230620/intergenic.pdf",p,
       width=10,height=8)
}
ggsave("/home/users/ayh/Projects/27_A3B/06_Figure_code/edit_figure/Fig4/240222/intergenic.pdf",p,
       width=10,height=8)

chisq.test(c(3977,8470),p=prop.table(c(gene_len,intergenic_len)))


inter_df

##total
#######
gene_df$id%>%unique()
inter_gene_tot_df<-rbind(inter_df%>%select(-id,-FORMAT,-TP53)%>%unique()%>%
                           dplyr::summarise(n=n())%>%
                           mutate(len=intergenic_len)%>%
                           mutate(rate=n/intergenic_len)%>%
                           mutate(class="intergenic")
                         ,
                         gene_df%>%select(-id,-FORMAT,-TP53)%>%unique()%>%
                           dplyr::summarise(n=n())%>%
                           mutate(len=gene_len)%>%
                           mutate(rate=n/gene_len)%>%
                           mutate(class="genic")
)

inter_gene_tot_df%>%
  select(class,n)%>%
  spread(class,n)%>%
  mutate(gene_ratio=genic/(genic+intergenic),
         inter_ratio=intergenic/(genic+intergenic))

inter_gene_tot_df%>%
  select(n,class)%>%
  spread(class,n)%>%
  mutate(genic_rate=genic/gene_len/16,intergenic_rate=intergenic/intergenic_len/16)%>%
  mutate(genic_rate_ratio=genic_rate/(genic_rate+intergenic_rate),
         intergenic_rate_ratio=intergenic_rate/(intergenic_rate+genic_rate))



##C>T,C>G, separate##
#####################


##by sample
###########
inter_df%>%filter(mut_type=="C>G,C>T")
inter_df$mut_type<-factor(inter_df$mut_type,levels=c("C>T","C>G"))
gene_df$mut_type<-factor(gene_df$mut_type,levels=c("C>T","C>G"))
inter_gene_sample_df<-rbind(inter_df%>%group_by(id,TP53,mut_type)%>%
                              dplyr::summarise(n=n())%>%
                              mutate(len=intergenic_len)%>%
                              mutate(rate=n/intergenic_len)%>%
                              mutate(class="intergenic")
                            ,
                            gene_df%>%group_by(id,TP53,mut_type)%>%
                              dplyr::summarise(n=n())%>%
                              mutate(len=gene_len)%>%
                              mutate(rate=n/gene_len)%>%
                              mutate(class="genic")
)


inter_gene_sample_df%>%select(-len,-rate)%>%
  spread(class,n)%>%
  mutate(genic_rate=genic/gene_len,intergenic_rate=intergenic/intergenic_len)%>%
  mutate(genic_rate_ratio=genic_rate/(genic_rate+intergenic_rate),
         intergenic_rate_ratio=intergenic_rate/(intergenic_rate+genic_rate))%>%
  select(id,TP53,mut_type,genic)%>%
  spread(mut_type,genic)%>%
  mutate(`C>T_ratio`=`C>T`/(`C>T`+`C>G`))%>%
  mutate(`C>G_ratio`=`C>G`/(`C>T`+`C>G`))


inter_gene_sample_df%>%select(-len,-rate)%>%
  spread(class,n)%>%
  mutate(genic_rate=genic/gene_len,intergenic_rate=intergenic/intergenic_len)%>%
  mutate(genic_rate_ratio=genic_rate/(genic_rate+intergenic_rate),
         intergenic_rate_ratio=intergenic_rate/(intergenic_rate+genic_rate))%>%
  select(id,TP53,mut_type,intergenic)%>%
  spread(mut_type,intergenic)%>%
  mutate(`C>T_ratio`=`C>T`/(`C>T`+`C>G`))%>%
  mutate(`C>G_ratio`=`C>G`/(`C>T`+`C>G`))

left_join(inter_gene_sample_df%>%
            select(id,TP53,class,n)%>%
            spread(class,n)%>%
            mutate(genic_rate=genic/gene_len,
                   intergenic_rate=intergenic/intergenic_len)
          ,
          inter_gene_sample_df%>%select(id,TP53,rate,class)%>%
            spread(class,rate)%>%
            mutate(gene_rato=genic/(genic+intergenic),
                   intergenic_ratio=intergenic/(genic+intergenic))
)

ggplot(inter_gene_sample_df,aes(x=TP53,y=rate,col=class))+
  geom_boxplot()


##by TP53
#########


inter_gene_group_df<-rbind(inter_df%>%select(-id,-FORMAT)%>%unique()%>%group_by(TP53,mut_type)%>%
                             dplyr::summarise(n=n())%>%
                             mutate(len=intergenic_len)%>%
                             mutate(rate=n/intergenic_len)%>%
                             mutate(class="intergenic")
                           ,
                           gene_df%>%select(-id,-FORMAT)%>%unique()%>%group_by(TP53,mut_type)%>%
                             dplyr::summarise(n=n())%>%
                             mutate(len=gene_len)%>%
                             mutate(rate=n/gene_len)%>%
                             mutate(class="genic")
)


inter_gene_group_df%>%
  select(TP53,mut_type,class,n)%>%
  spread(class,n)%>%
  mutate(genic_rate=genic/gene_len,
         intergenic_rate=intergenic/intergenic_len)%>%
  mutate(genic_rate_ratio=genic_rate/(genic_rate+intergenic_rate),
         intergenic_rate_ratio=intergenic_rate/(intergenic_rate+genic_rate))%>%
  select(TP53,mut_type,genic)%>%
  spread(mut_type,genic)%>%
  mutate(`C>T_ratio`=`C>T`/(`C>T`+`C>G`))%>%
  mutate(`C>G_ratio`=`C>G`/(`C>T`+`C>G`))


inter_gene_group_df%>%
  select(TP53,mut_type,class,n)%>%
  spread(class,n)%>%
  mutate(genic_rate=genic/gene_len/6,
         intergenic_rate=intergenic/intergenic_len/6)%>%
  mutate(genic_rate_ratio=genic_rate/(genic_rate+intergenic_rate),
         intergenic_rate_ratio=intergenic_rate/(intergenic_rate+genic_rate))%>%
  select(TP53,mut_type,intergenic)%>%
  spread(mut_type,intergenic)%>%
  mutate(`C>T_ratio`=`C>T`/(`C>T`+`C>G`))%>%
  mutate(`C>G_ratio`=`C>G`/(`C>T`+`C>G`))






##total
#######

inter_gene_tot_df<-rbind(inter_df%>%select(-id,-FORMAT,-TP53)%>%unique()%>%
                           group_by(mut_type)%>%
                           dplyr::summarise(n=n())%>%
                           mutate(len=intergenic_len)%>%
                           mutate(rate=n/intergenic_len)%>%
                           mutate(class="intergenic")
                         ,
                         gene_df%>%select(-id,-FORMAT,-TP53)%>%unique()%>%
                           group_by(mut_type)%>%
                           dplyr::summarise(n=n())%>%
                           mutate(len=gene_len)%>%
                           mutate(rate=n/gene_len)%>%
                           mutate(class="genic")
)

inter_gene_tot_df%>%
  select(mut_type,class,n)%>%
  spread(class,n)%>%
  mutate(gene_ratio=genic/(genic+intergenic),
         inter_ratio=intergenic/(genic+intergenic))

inter_gene_tot_df%>%
  select(mut_type,n,class)%>%
  spread(class,n)%>%
  mutate(genic_rate=genic/gene_len,intergenic_rate=intergenic/intergenic_len)%>%
  mutate(genic_rate_ratio=genic_rate/(genic_rate+intergenic_rate),
         intergenic_rate_ratio=intergenic_rate/(intergenic_rate+genic_rate))%>%
  select(mut_type,genic)%>%
  spread(mut_type,genic)%>%
  mutate(`C>T_ratio`=`C>T`/(`C>T`+`C>G`))%>%
  mutate(`C>G_ratio`=`C>G`/(`C>T`+`C>G`))


inter_gene_tot_df%>%
  select(mut_type,n,class)%>%
  spread(class,n)%>%
  mutate(genic_rate=genic/gene_len,intergenic_rate=intergenic/intergenic_len)%>%
  mutate(genic_rate_ratio=genic_rate/(genic_rate+intergenic_rate),
         intergenic_rate_ratio=intergenic_rate/(intergenic_rate+genic_rate))%>%
  select(mut_type,intergenic)%>%
  spread(mut_type,intergenic)%>%
  mutate(`C>T_ratio`=`C>T`/(`C>T`+`C>G`))%>%
  mutate(`C>G_ratio`=`C>G`/(`C>T`+`C>G`))




##`C>T`,`C>G` compare
#####################



##by sample
###########
inter_df%>%filter(mut_type=="C>G,CT")

inter_gene_sample_df<-rbind(inter_df%>%group_by(id,TP53,mut_type)%>%
                              dplyr::summarise(n=n())%>%
                              mutate(len=intergenic_len)%>%
                              mutate(rate=n/intergenic_len)%>%
                              mutate(class="intergenic")
                            ,
                            gene_df%>%group_by(id,TP53,mut_type)%>%
                              dplyr::summarise(n=n())%>%
                              mutate(len=gene_len)%>%
                              mutate(rate=n/gene_len)%>%
                              mutate(class="genic")
)


inter_gene_sample_df%>%select(-len,-rate)%>%
  spread(class,n)%>%
  mutate(genic_rate=genic/gene_len,intergenic_rate=intergenic/intergenic_len)%>%
  mutate(genic_rate_ratio=genic_rate/(genic_rate+intergenic_rate),
         intergenic_rate_ratio=intergenic_rate/(intergenic_rate+genic_rate))%>%
  print(n=100)


left_join(inter_gene_sample_df%>%
            select(id,TP53,class,n)%>%
            spread(class,n)%>%
            mutate(genic_rate=genic/gene_len,
                   intergenic_rate=intergenic/intergenic_len)
          ,
          inter_gene_sample_df%>%select(id,TP53,rate,class)%>%
            spread(class,rate)%>%
            mutate(gene_rato=genic/(genic+intergenic),
                   intergenic_ratio=intergenic/(genic+intergenic))
)

ggplot(inter_gene_sample_df,aes(x=TP53,y=rate,col=class))+
  geom_boxplot()


##by TP53
#########


inter_gene_group_df<-rbind(inter_df%>%select(-id,-FORMAT)%>%unique()%>%group_by(TP53,mut_type)%>%
                             dplyr::summarise(n=n())%>%
                             mutate(len=intergenic_len)%>%
                             mutate(rate=n/intergenic_len)%>%
                             mutate(class="intergenic")
                           ,
                           gene_df%>%select(-id,-FORMAT)%>%unique()%>%group_by(TP53,mut_type)%>%
                             dplyr::summarise(n=n())%>%
                             mutate(len=gene_len)%>%
                             mutate(rate=n/gene_len)%>%
                             mutate(class="genic")
)


inter_gene_group_df%>%
  select(TP53,mut_type,class,n)%>%
  spread(class,n)%>%
  mutate(genic_rate=ifelse(TP53=="WT",genic/gene_len/10,genic/gene_len/6),
         intergenic_rate=ifelse(TP53=="WT",intergenic/intergenic_len/10,intergenic/intergenic_len/6))%>%
  mutate(genic_rate_ratio=genic_rate/(genic_rate+intergenic_rate),
         intergenic_rate_ratio=intergenic_rate/(intergenic_rate+genic_rate))




##total
#######

inter_gene_tot_df<-rbind(inter_df%>%select(-id,-FORMAT,-TP53)%>%unique()%>%
                           group_by(mut_type)%>%
                           dplyr::summarise(n=n())%>%
                           mutate(len=intergenic_len)%>%
                           mutate(rate=n/intergenic_len)%>%
                           mutate(class="intergenic")
                         ,
                         gene_df%>%select(-id,-FORMAT,-TP53)%>%unique()%>%
                           group_by(mut_type)%>%
                           dplyr::summarise(n=n())%>%
                           mutate(len=gene_len)%>%
                           mutate(rate=n/gene_len)%>%
                           mutate(class="genic")
)

inter_gene_tot_df%>%
  select(mut_type,class,n)%>%
  spread(class,n)%>%
  mutate(gene_ratio=genic/(genic+intergenic),
         inter_ratio=intergenic/(genic+intergenic))

inter_gene_tot_df%>%
  select(mut_type,n,class)%>%
  spread(class,n)%>%
  mutate(genic_rate=genic/gene_len/12,intergenic_rate=intergenic/intergenic_len/12)%>%
  mutate(genic_rate_ratio=genic_rate/(genic_rate+intergenic_rate),
         intergenic_rate_ratio=intergenic_rate/(intergenic_rate+genic_rate))



##data saving##


##by sample
###########

inter_gene_sample_df<-rbind(inter_df%>%group_by(id,TP53)%>%
                              dplyr::summarise(n=n())%>%
                              mutate(len=intergenic_len)%>%
                              mutate(rate=n/intergenic_len)%>%
                              mutate(class="intergenic")
                            ,
                            gene_df%>%group_by(id,TP53)%>%
                              dplyr::summarise(n=n())%>%
                              mutate(len=gene_len)%>%
                              mutate(rate=n/gene_len)%>%
                              mutate(class="genic")
)
inter_df%>%filter(id=="A3A_1st_C3_100ng-1")%>%nrow()
gene_df%>%filter(id=="A3A_1st_C3_100ng-1")
inter_gene_sample_df%>%filter(id=="A3A_1st_C3_100ng-1")

inter_gene_sample_df%>%select(-len,-rate)%>%
  spread(class,n)%>%
  mutate(genic_rate=genic/gene_len,intergenic_rate=intergenic/intergenic_len)%>%
  mutate(genic_rate_ratio=genic_rate/(genic_rate+intergenic_rate),
         intergenic_rate_ratio=intergenic_rate/(intergenic_rate+genic_rate))%>%
  write.table("/home/users/ayh/Projects/27_A3B/06_Figure_code/edit_figure/Fig4/240222/data/intergenic/intergenic_sample.txt",
              quote=F,
              row.names=F,
              sep="\t")


left_join(inter_gene_sample_df%>%
            select(id,TP53,class,n)%>%
            spread(class,n)%>%
            mutate(genic_rate=genic/gene_len,
                   intergenic_rate=intergenic/intergenic_len)
          ,
          inter_gene_sample_df%>%select(id,TP53,rate,class)%>%
            spread(class,rate)%>%
            mutate(gene_ratio=genic/(genic+intergenic),
                   intergenic_ratio=intergenic/(genic+intergenic))%>%
            select(id,gene_ratio,intergenic_ratio)
)
inter_gene_sample_df$TP53<-factor(inter_gene_sample_df$TP53,levels=c("WT","KO"))
inter_gene_sample_df$class<-factor(inter_gene_sample_df$class,levels=c("intergenic","genic"))


inter_gene_sample_df%>%select(id,TP53,rate,class)%>%
  spread(class,rate)%>%
  gather(class,rate,genic:intergenic)%>%
  mutate(class=gsub("_rate","",class))%>%
  group_by(TP53)%>%
  rstatix::t_test(rate~class)

inter_gene_sample_df%>%select(id,TP53,rate,class)%>%
  spread(class,rate)%>%
  gather(class,rate,genic:intergenic)%>%
  mutate(class=gsub("_rate","",class))%>%
  group_by(TP53)%>%
  rstatix::wilcox_test(rate~class)
#TP53  .y.   group1 group2        n1    n2 statistic    df     p
#* <fct> <chr> <chr>  <chr>      <int> <int>     <dbl> <dbl> <dbl>
#1 WT    rate  genic  intergenic    10    10    -0.896 15.7  0.384
#2 KO    rate  genic  intergenic     6     6    -1.75   8.59 0.116

if(FALSE){
inter_gene_sample_rate_ci_df<-inter_gene_sample_df%>%select(id,TP53,rate,class)%>%
  spread(class,rate)%>%
  gather(class,rate,genic:intergenic)%>%
  mutate(class=gsub("_rate","",class))%>%
  group_by(TP53,class)%>%
  summarise(mean.rate = mean(rate, na.rm = TRUE),
            sd.rate = sd(rate, na.rm = TRUE),
            n.rate = n()) %>%
  mutate(se.rate = sd.rate / sqrt(n.rate),
         lower.ci.rate = mean.rate - qt(1 - (0.05 / 2), n.rate - 1) * se.rate,
         upper.ci.rate = mean.rate + qt(1 - (0.05 / 2), n.rate - 1) * se.rate)


inter_gene_sample_df%>%select(id,TP53,rate,class)%>%
  spread(class,rate)%>%
  mutate(genic_ratio=genic/(genic+intergenic),
         intergenic_ratio=intergenic/(genic+intergenic))%>%
  select(id,TP53,genic_ratio,intergenic_ratio)%>%
  gather(class,ratio,genic_ratio:intergenic_ratio)%>%
  mutate(class=gsub("_ratio","",class))%>%
  group_by(TP53)%>%
  rstatix::t_test(ratio~class)

inter_gene_sample_ci_df<-inter_gene_sample_df%>%select(id,TP53,rate,class)%>%
  spread(class,rate)%>%
  mutate(genic_ratio=genic/(genic+intergenic),
         intergenic_ratio=intergenic/(genic+intergenic))%>%
  select(id,TP53,genic_ratio,intergenic_ratio)%>%
  gather(class,ratio,genic_ratio:intergenic_ratio)%>%
  mutate(class=gsub("_ratio","",class))%>%
  group_by(TP53,class)%>%
  summarise(mean.ratio = mean(ratio, na.rm = TRUE),
            sd.ratio = sd(ratio, na.rm = TRUE),
            n.ratio = n()) %>%
  mutate(se.ratio = sd.ratio / sqrt(n.ratio),
         lower.ci.ratio = mean.ratio - qt(1 - (0.05 / 2), n.ratio - 1) * se.ratio,
         upper.ci.ratio = mean.ratio + qt(1 - (0.05 / 2), n.ratio - 1) * se.ratio)
inter_gene_sample_ci_df$class<-factor(inter_gene_sample_ci_df$class,levels=c("intergenic","genic"))
#  filter(TP53=="WT")%>%
p2<-inter_gene_sample_ci_df%>%
  ggplot(aes(x=class,y=mean.ratio,fill=class,col="black"))+
  geom_bar(stat="identity")+
  geom_errorbar(aes(y=mean.ratio,ymax=upper.ci.ratio,ymin=lower.ci.ratio),width=.5)+
  facet_wrap(~TP53)+
  scale_y_continuous(lim=c(0,1))+
  theme_bw()+
  theme(axis.title.x=element_blank(),
        axis.text.x=element_text(size=30),
        #plot.title=element_text(size=30),
        axis.text.y=element_text(size=40),
        axis.title.y=element_text(size=40),
        axis.ticks.length=unit(.4, "cm"),
        axis.ticks = element_line(size = 4)
  )+
  
  scale_color_manual(values=c("black"))+
  scale_y_continuous(lim=c(0,1))
p2
ggsave("/home/users/ayh/Projects/27_A3B/06_Figure_code/edit_figure/Fig4/240222/intergenic.ci.pdf",p2,
       width=10,height=8)



inter_gene_sample_df%>%
  group_by(TP53,class)%>%
  summarise(mean.rate = mean(rate, na.rm = TRUE),
            sd.rate = sd(rate, na.rm = TRUE),
            n.rate = n()) %>%
  mutate(se.rate = sd.rate / sqrt(n.rate),
         lower.ci.rate = mean.rate - qt(1 - (0.05 / 2), n.rate - 1) * se.rate,
         upper.ci.rate = mean.rate + qt(1 - (0.05 / 2), n.rate - 1) * se.rate)%>%
  ggplot(aes(x=class,y=mean.rate*1000000))+
  geom_bar(stat="identity")+
  geom_errorbar(aes(y=mean.rate*1000000,ymax=upper.ci.rate*1000000,ymin=lower.ci.rate*1000000))+
  facet_wrap(~TP53)+
  
  
  
  ggplot(inter_gene_sample_df,aes(x=TP53,y=rate,col=class))+
  geom_boxplot()



ggplot(inter_gene_sample_df,aes(x=TP53,y=rate,col=class))+
  geom_boxplot()

}
##by TP53
#########
gene_df%>%select(-id,-FORMAT)%>%unique()%>%group_by(TP53,Func_refGene)%>%
  dplyr::summarise(n=n())

inter_gene_group_df<-rbind(inter_df%>%select(-id,-FORMAT)%>%unique()%>%group_by(TP53)%>%
                             dplyr::summarise(n=n())%>%
                             mutate(len=intergenic_len)%>%
                             mutate(rate=n/intergenic_len)%>%
                             mutate(class="intergenic")
                           ,
                           gene_df%>%select(-id,-FORMAT)%>%unique()%>%group_by(TP53)%>%
                             dplyr::summarise(n=n())%>%
                             mutate(len=gene_len)%>%
                             mutate(rate=n/gene_len)%>%
                             mutate(class="genic")
)

if(FALSE){
inter_df<-inter_gene_group_df%>%
  select(TP53,class,n)%>%
  spread(class,n)%>%
  mutate(genic_rate=ifelse(TP53=="WT",genic/gene_len/10,genic/gene_len/6),
         intergenic_rate=ifelse(TP53=="WT",intergenic/intergenic_len/10,intergenic/intergenic_len/6))%>%
  mutate(genic_rate_ratio=genic_rate/(genic_rate+intergenic_rate),
         intergenic_rate_ratio=intergenic_rate/(intergenic_rate+genic_rate))%>%
  filter(TP53=="WT")%>%
  gather(region,rate,genic_rate:intergenic_rate)#%>%
}
inter_gene_group_df%>%
  select(TP53,class,n)%>%
  spread(class,n)%>%
  mutate(genic_rate=ifelse(TP53=="WT",genic/gene_len/10,genic/gene_len/6),
         intergenic_rate=ifelse(TP53=="WT",intergenic/intergenic_len/10,intergenic/intergenic_len/6))%>%
  mutate(genic_rate_ratio=genic_rate/(genic_rate+intergenic_rate),
         intergenic_rate_ratio=intergenic_rate/(intergenic_rate+genic_rate))%>%
  write.table("/home/users/ayh/Projects/27_A3B/06_Figure_code/edit_figure/Fig4/240222/data/intergenic/intergenic_TP53.txt",
              quote=F,
              row.names=F,
              sep="\t")



##total
#######
gene_df$id%>%unique()
inter_gene_tot_df<-rbind(inter_df%>%select(-id,-FORMAT,-TP53)%>%unique()%>%
                           dplyr::summarise(n=n())%>%
                           mutate(len=intergenic_len)%>%
                           mutate(rate=n/intergenic_len)%>%
                           mutate(class="intergenic")
                         ,
                         gene_df%>%select(-id,-FORMAT,-TP53)%>%unique()%>%
                           dplyr::summarise(n=n())%>%
                           mutate(len=gene_len)%>%
                           mutate(rate=n/gene_len)%>%
                           mutate(class="genic")
)

inter_gene_tot_df%>%
  select(class,n)%>%
  spread(class,n)%>%
  mutate(gene_ratio=genic/(genic+intergenic),
         inter_ratio=intergenic/(genic+intergenic))

inter_gene_tot_df%>%
  select(n,class)%>%
  spread(class,n)%>%
  mutate(genic_rate=genic/gene_len/16,intergenic_rate=intergenic/intergenic_len/16)%>%
  mutate(genic_rate_ratio=genic_rate/(genic_rate+intergenic_rate),
         intergenic_rate_ratio=intergenic_rate/(intergenic_rate+genic_rate))



##C>T,C>G, separate##
#####################


##by sample
###########
inter_df%>%filter(mut_type=="C>G,C>T")
inter_df$mut_type<-factor(inter_df$mut_type,levels=c("C>T","C>G"))
gene_df$mut_type<-factor(gene_df$mut_type,levels=c("C>T","C>G"))
inter_gene_sample_df<-rbind(inter_df%>%group_by(id,TP53,mut_type)%>%
                              dplyr::summarise(n=n())%>%
                              mutate(len=intergenic_len)%>%
                              mutate(rate=n/intergenic_len)%>%
                              mutate(class="intergenic")
                            ,
                            gene_df%>%group_by(id,TP53,mut_type)%>%
                              dplyr::summarise(n=n())%>%
                              mutate(len=gene_len)%>%
                              mutate(rate=n/gene_len)%>%
                              mutate(class="genic")
)

rbind(
  inter_gene_sample_df%>%select(-len,-rate)%>%
    spread(class,n)%>%
    mutate(genic_rate=genic/gene_len,intergenic_rate=intergenic/intergenic_len)%>%
    mutate(genic_rate_ratio=genic_rate/(genic_rate+intergenic_rate),
           intergenic_rate_ratio=intergenic_rate/(intergenic_rate+genic_rate))%>%
    select(id,TP53,mut_type,genic)%>%
    spread(mut_type,genic)%>%
    mutate(`C>T_ratio`=`C>T`/(`C>T`+`C>G`))%>%
    mutate(`C>G_ratio`=`C>G`/(`C>T`+`C>G`))%>%
    mutate(region="genic"),
  inter_gene_sample_df%>%select(-len,-rate)%>%
    spread(class,n)%>%
    mutate(genic_rate=genic/gene_len,intergenic_rate=intergenic/intergenic_len)%>%
    mutate(genic_rate_ratio=genic_rate/(genic_rate+intergenic_rate),
           intergenic_rate_ratio=intergenic_rate/(intergenic_rate+genic_rate))%>%
    select(id,TP53,mut_type,intergenic)%>%
    spread(mut_type,intergenic)%>%
    mutate(`C>T_ratio`=`C>T`/(`C>T`+`C>G`))%>%
    mutate(`C>G_ratio`=`C>G`/(`C>T`+`C>G`))%>%
    mutate(region="intergenic")
)%>%
  write.table("/home/users/ayh/Projects/27_A3B/06_Figure_code/edit_figure/Fig4/240222/data/intergenic/intergenic_sample.ratio.mut_type.txt",
              quote=F,
              row.names=F,
              sep="\t")


##by TP53
#########


inter_gene_group_df<-rbind(inter_df%>%select(-id,-FORMAT)%>%unique()%>%group_by(TP53,mut_type)%>%
                             dplyr::summarise(n=n())%>%
                             mutate(len=intergenic_len)%>%
                             mutate(rate=n/intergenic_len)%>%
                             mutate(class="intergenic")
                           ,
                           gene_df%>%select(-id,-FORMAT)%>%unique()%>%group_by(TP53,mut_type)%>%
                             dplyr::summarise(n=n())%>%
                             mutate(len=gene_len)%>%
                             mutate(rate=n/gene_len)%>%
                             mutate(class="genic")
)

rbind(
  inter_gene_group_df%>%
    select(TP53,mut_type,class,n)%>%
    spread(class,n)%>%
    mutate(genic_rate=genic/gene_len,
           intergenic_rate=intergenic/intergenic_len)%>%
    mutate(genic_rate_ratio=genic_rate/(genic_rate+intergenic_rate),
           intergenic_rate_ratio=intergenic_rate/(intergenic_rate+genic_rate))%>%
    select(TP53,mut_type,genic)%>%
    spread(mut_type,genic)%>%
    mutate(`C>T_ratio`=`C>T`/(`C>T`+`C>G`))%>%
    mutate(`C>G_ratio`=`C>G`/(`C>T`+`C>G`))%>%
    mutate(region="genic"),
  inter_gene_group_df%>%
    select(TP53,mut_type,class,n)%>%
    spread(class,n)%>%
    mutate(genic_rate=genic/gene_len/6,
           intergenic_rate=intergenic/intergenic_len/6)%>%
    mutate(genic_rate_ratio=genic_rate/(genic_rate+intergenic_rate),
           intergenic_rate_ratio=intergenic_rate/(intergenic_rate+genic_rate))%>%
    select(TP53,mut_type,intergenic)%>%
    spread(mut_type,intergenic)%>%
    mutate(`C>T_ratio`=`C>T`/(`C>T`+`C>G`))%>%
    mutate(`C>G_ratio`=`C>G`/(`C>T`+`C>G`))%>%
    mutate(region="intergenic")
)%>%
  write.table("/home/users/ayh/Projects/27_A3B/06_Figure_code/edit_figure/Fig4/240222/data/intergenic/intergenic_TP53.ratio.mut_type.txt",
              quote=F,
              row.names=F,
              sep="\t")





##total
#######

inter_gene_tot_df<-rbind(inter_df%>%select(-id,-FORMAT,-TP53)%>%unique()%>%
                           group_by(mut_type)%>%
                           dplyr::summarise(n=n())%>%
                           mutate(len=intergenic_len)%>%
                           mutate(rate=n/intergenic_len)%>%
                           mutate(class="intergenic")
                         ,
                         gene_df%>%select(-id,-FORMAT,-TP53)%>%unique()%>%
                           group_by(mut_type)%>%
                           dplyr::summarise(n=n())%>%
                           mutate(len=gene_len)%>%
                           mutate(rate=n/gene_len)%>%
                           mutate(class="genic")
)

inter_gene_tot_df%>%
  select(mut_type,class,n)%>%
  spread(class,n)%>%
  mutate(gene_ratio=genic/(genic+intergenic),
         inter_ratio=intergenic/(genic+intergenic))

inter_gene_tot_df%>%
  select(mut_type,n,class)%>%
  spread(class,n)%>%
  mutate(genic_rate=genic/gene_len,intergenic_rate=intergenic/intergenic_len)%>%
  mutate(genic_rate_ratio=genic_rate/(genic_rate+intergenic_rate),
         intergenic_rate_ratio=intergenic_rate/(intergenic_rate+genic_rate))%>%
  select(mut_type,genic)%>%
  spread(mut_type,genic)%>%
  mutate(`C>T_ratio`=`C>T`/(`C>T`+`C>G`))%>%
  mutate(`C>G_ratio`=`C>G`/(`C>T`+`C>G`))


inter_gene_tot_df%>%
  select(mut_type,n,class)%>%
  spread(class,n)%>%
  mutate(genic_rate=genic/gene_len,intergenic_rate=intergenic/intergenic_len)%>%
  mutate(genic_rate_ratio=genic_rate/(genic_rate+intergenic_rate),
         intergenic_rate_ratio=intergenic_rate/(intergenic_rate+genic_rate))%>%
  select(mut_type,intergenic)%>%
  spread(mut_type,intergenic)%>%
  mutate(`C>T_ratio`=`C>T`/(`C>T`+`C>G`))%>%
  mutate(`C>G_ratio`=`C>G`/(`C>T`+`C>G`))




##`C>T`,`C>G` compare
#####################



##by sample
###########
inter_df%>%filter(mut_type=="C>G,CT")

inter_gene_sample_df<-rbind(inter_df%>%group_by(id,TP53,mut_type)%>%
                              dplyr::summarise(n=n())%>%
                              mutate(len=intergenic_len)%>%
                              mutate(rate=n/intergenic_len)%>%
                              mutate(class="intergenic")
                            ,
                            gene_df%>%group_by(id,TP53,mut_type)%>%
                              dplyr::summarise(n=n())%>%
                              mutate(len=gene_len)%>%
                              mutate(rate=n/gene_len)%>%
                              mutate(class="genic")
)


inter_gene_sample_df%>%select(-len,-rate)%>%
  spread(class,n)%>%
  mutate(genic_rate=genic/gene_len,intergenic_rate=intergenic/intergenic_len)%>%
  mutate(genic_rate_ratio=genic_rate/(genic_rate+intergenic_rate),
         intergenic_rate_ratio=intergenic_rate/(intergenic_rate+genic_rate))%>%
  
  write.table("/home/users/ayh/Projects/27_A3B/06_Figure_code/edit_figure/Fig4/240222/data/intergenic/intergenic_sample.ratio.mut_type.region.txt",
              quote=F,
              row.names=F,
              sep="\t")


left_join(inter_gene_sample_df%>%
            select(id,TP53,class,n)%>%
            spread(class,n)%>%
            mutate(genic_rate=genic/gene_len,
                   intergenic_rate=intergenic/intergenic_len)
          ,
          inter_gene_sample_df%>%select(id,TP53,rate,class)%>%
            spread(class,rate)%>%
            mutate(gene_rato=genic/(genic+intergenic),
                   intergenic_ratio=intergenic/(genic+intergenic))
)

ggplot(inter_gene_sample_df,aes(x=TP53,y=rate,col=class))+
  geom_boxplot()


##by TP53
#########


inter_gene_group_df<-rbind(inter_df%>%select(-id,-FORMAT)%>%unique()%>%group_by(TP53,mut_type)%>%
                             dplyr::summarise(n=n())%>%
                             mutate(len=intergenic_len)%>%
                             mutate(rate=n/intergenic_len)%>%
                             mutate(class="intergenic")
                           ,
                           gene_df%>%select(-id,-FORMAT)%>%unique()%>%group_by(TP53,mut_type)%>%
                             dplyr::summarise(n=n())%>%
                             mutate(len=gene_len)%>%
                             mutate(rate=n/gene_len)%>%
                             mutate(class="genic")
)


inter_gene_group_df%>%
  select(TP53,mut_type,class,n)%>%
  spread(class,n)%>%
  mutate(genic_rate=ifelse(TP53=="WT",genic/gene_len/10,genic/gene_len/6),
         intergenic_rate=ifelse(TP53=="WT",intergenic/intergenic_len/10,intergenic/intergenic_len/6))%>%
  mutate(genic_rate_ratio=genic_rate/(genic_rate+intergenic_rate),
         intergenic_rate_ratio=intergenic_rate/(intergenic_rate+genic_rate))%>%
  write.table("/home/users/ayh/Projects/27_A3B/06_Figure_code/edit_figure/Fig4/240222/data/intergenic/intergenic_TP53.ratio.mut_type.region.txt",
              quote=F,
              row.names=F,
              sep="\t")




##total
#######

inter_gene_tot_df<-rbind(inter_df%>%select(-id,-FORMAT,-TP53)%>%unique()%>%
                           group_by(mut_type)%>%
                           dplyr::summarise(n=n())%>%
                           mutate(len=intergenic_len)%>%
                           mutate(rate=n/intergenic_len)%>%
                           mutate(class="intergenic")
                         ,
                         gene_df%>%select(-id,-FORMAT,-TP53)%>%unique()%>%
                           group_by(mut_type)%>%
                           dplyr::summarise(n=n())%>%
                           mutate(len=gene_len)%>%
                           mutate(rate=n/gene_len)%>%
                           mutate(class="genic")
)

inter_gene_tot_df%>%
  select(mut_type,class,n)%>%
  spread(class,n)%>%
  mutate(gene_ratio=genic/(genic+intergenic),
         inter_ratio=intergenic/(genic+intergenic))

inter_gene_tot_df%>%
  select(mut_type,n,class)%>%
  spread(class,n)%>%
  mutate(genic_rate=genic/gene_len/12,intergenic_rate=intergenic/intergenic_len/12)%>%
  mutate(genic_rate_ratio=genic_rate/(genic_rate+intergenic_rate),
         intergenic_rate_ratio=intergenic_rate/(intergenic_rate+genic_rate))

