library(dplyr)
library(tidyverse)
library(ggplot2)
intron_mut_type_df<-read_tsv("/home/users/ayh/Projects/27_A3B/06_Figure_code/edit_figure/Fig4/240222/data/intron/intron_TP53.ratio.mut_type.txt")%>%
  mutate(region=ifelse(region=="exon",paste0(region,"_internal"),region))
UTR_mut_type_df<-read_tsv("/home/users/ayh/Projects/27_A3B/06_Figure_code/edit_figure/Fig4/240222/data/UTR/UTR_TP53.mut_type.txt")





exon_df<-read_tsv("/home/users/ayh/Projects/27_A3B/03_sequencing/single_clone_sequencing/WGS/28_genomic_feature/01_refernce/fin/v3/intron_excl_overlap/A3A.total.exon.excl_overlap.raw.rh.vcf")%>%
  unique()%>%
  filter(REF%in%c("A","C","G","T")&
           ALT%in%c("A","C","G","T"))%>%
  mutate(mut_type=ifelse(REF=="C",paste(REF,ALT,sep=">"),paste(chartr("ATGC","TACG",REF),chartr("ATGC","TACG",ALT),sep=">")))



map_df<-read_tsv("/home/users/ayh/Projects/27_A3B/03_sequencing/single_clone_sequencing/WGS/28_genomic_feature/01_refernce/fin/v3/mappable/A3A.total.mappable.raw.rh.vcf")
intergenic_df<-read_tsv("/home/users/ayh/Projects/27_A3B/03_sequencing/single_clone_sequencing/WGS/28_genomic_feature/01_refernce/fin/v3/intergenic/A3A.total.intergenic.raw.rh.vcf")
genic_df<-read_tsv("/home/users/ayh/Projects/27_A3B/03_sequencing/single_clone_sequencing/WGS/28_genomic_feature/01_refernce/fin/v3/intergenic/A3A.total.gene.raw.rh.vcf")

mut_type_merge_df<-
  rbind(intron_mut_type_df,
        UTR_mut_type_df,
        exon_df%>%
          group_by(TP53,mut_type)%>%
          dplyr::summarise(n=n())%>%
          spread(mut_type,n)%>%
          mutate(`C>G_ratio`=`C>G`/(`C>T`+`C>G`),
                 `C>T_ratio`=`C>T`/(`C>T`+`C>G`))%>%
          mutate(region="exon_tot"),
        intergenic_df%>%
          group_by(TP53,mut_type)%>%
          dplyr::summarise(n=n())%>%
          spread(mut_type,n)%>%
          mutate(`C>G_ratio`=`C>G`/(`C>T`+`C>G`),
                 `C>T_ratio`=`C>T`/(`C>T`+`C>G`))%>%
          mutate(region="intergenic"),
        genic_df%>%
          group_by(TP53,mut_type)%>%
          dplyr::summarise(n=n())%>%
          spread(mut_type,n)%>%
          mutate(`C>G_ratio`=`C>G`/(`C>T`+`C>G`),
                 `C>T_ratio`=`C>T`/(`C>T`+`C>G`))%>%
          mutate(region="genic")
        
  )
mut_type_merge_df
mut_type_merge_df%>%
  write.table("/home/users/ayh/Projects/27_A3B/06_Figure_code/edit_figure/Fig4/240222/data/UTR/mut_type.strand.merge.tsv",
              sep="\t",
              quote=F,
              row.names=F)
map_df%>%
  group_by(TP53,mut_type)%>%
  dplyr::summarise(n=n())%>%
  spread(mut_type,n)%>%
  mutate(`C>T_ratio`=`C>T`/(`C>T`+`C>G`))

#TP53  `C>G` `C>T` T_ratio
#<chr> <int> <int>   <dbl>
#1 KO     3811  4844   0.560
#2 WT     5269  6122   0.537

mut_type_merge_df$TP53<-factor(mut_type_merge_df$TP53,levels=c("WT","KO"))
mut_type_merge_df$region<-factor(mut_type_merge_df$region,levels=c("intergenic","genic","UTR5","exon_tot","exon_internal","intron","UTR3"))
p_mut_type_tot<-mut_type_merge_df%>%
  filter(region!="exon_internal")%>%
  filter(region!="intergenic")%>%
  filter(region!="genic")%>%
  select(-`C>T`,-`C>G`)%>%
  gather("mut_type","ratio",2:3)%>%
  filter(mut_type=="C>T_ratio")%>%
  mutate(col="col")%>%
  ggplot(aes(x=TP53,y=`ratio`,fill=region,col=col))+
  geom_bar(stat="identity",position="dodge")+
  theme_classic()%>%
  geom_hline(yintercept=0.560,color="blue")+
  geom_hline(yintercept=0.537,color="red")+
  theme_classic()+
  ylim(c(0,1))
p_mut_type_tot
p_mut_type_tot%>%dplyr::filter(TP53=="WT")
ggsave("/home/users/ayh/Projects/27_A3B/06_Figure_code/edit_figure/Fig4/240222/mut_type_ratio_tot.sim.pdf",p_mut_type_tot,
       height=10,width=8)

mut_type_merge_df%>%dplyr::filter(TP53=="WT")%>%
  filter(region!="exon_internal")%>%
  filter(region!="intergenic")%>%
  filter(region!="genic")

#TP53  `C>T` `C>G` `C>T_ratio` `C>G_ratio` region  
#<fct> <dbl> <dbl>       <dbl>       <dbl> <fct>   
#  1 WT     1762  1640       0.518       0.482 intron  
#2 WT       49    46       0.516       0.484 UTR3    
#3 WT       17    10       0.630       0.370 UTR5    
# WT       74    48       0.607       0.393 exon_tot



xtab <- as.table(rbind(
  c(17, 74,1762,49),
  c(10, 48,1640,46)
))
dimnames(xtab) <- list(
  Survived = c("Yes", "No"),
  Class = c("1st", "2nd","3rd","4th")
)
xtab
#>         Class
#> Survived 1st 2nd 3rd Crew
#>      Yes 203 118 178  212
#>      No  122 167 528  673
# Chi-square test

#chisq.test(xtab)
rstatix::chisq_test(xtab)%>%
  write.table("/home/users/ayh/Projects/27_A3B/06_Figure_code/edit_figure/Fig4/240222/mut_type_ratio_total.WT.sim.chisq.pval.tsv",
              sep="\t",
              quote=F,
              row.names=F)

#n statistic     p    df method          p.signif
#* <dbl>     <dbl> <dbl> <int> <chr>           <chr>   
#1  3646      4.99 0.173     3 Chi-square test ns   

mut_type_merge_df%>%dplyr::filter(TP53=="KO")%>%
  filter(region!="exon_internal")%>%
  filter(region!="intergenic")%>%
  filter(region!="genic")

#TP53  `C>T` `C>G` `C>T_ratio` `C>G_ratio` region  
#<fct> <dbl> <dbl>       <dbl>       <dbl> <fct>   
#  1 KO     1386  1147       0.547       0.453 intron  
#2 KO       32    30       0.516       0.484 UTR3    
#3 KO       12     8       0.6         0.4   UTR5    
#4 KO       50    49       0.505       0.495 exon_tot



xtab <- as.table(rbind(
  c(12, 50,1386,32),
  c(8, 49,1147,30)
))
dimnames(xtab) <- list(
  Survived = c("Yes", "No"),
  Class = c("1st", "2nd","3rd","4th")
)
xtab
#>         Class
#> Survived 1st 2nd 3rd Crew
#>      Yes 203 118 178  212
#>      No  122 167 528  673
# Chi-square test

#chisq.test(xtab)
rstatix::chisq_test(xtab)%>%
write.table("/home/users/ayh/Projects/27_A3B/06_Figure_code/edit_figure/Fig4/240222/mut_type_ratio_total.KO.sim.chisq.pval.tsv",
            sep="\t",
            quote=F,
            row.names=F)



#n statistic     p    df method          p.signif
#* <dbl>     <dbl> <dbl> <int> <chr>           <chr>   
#  1  2714      1.14 0.768     3 Chi-square test ns   


p_mut_type_UTR_WT<-mut_type_merge_df%>%
  #mutate(col="black")%>%
  filter(region!="exon_internal")%>%
  filter(region!="intergenic")%>%
  filter(region!="genic")%>%
  select(-`C>T`,-`C>G`)%>%
  gather("mut_type","ratio",2:3)%>%
  filter(mut_type=="C>T_ratio")%>%
  filter(TP53=="WT")%>%
  filter(grepl("UTR",region))%>%
  mutate(col="black")%>%
  ggplot(aes(x=region,y=`ratio`,fill=region,col=col))+
  geom_bar(stat="identity",position="dodge")+
  theme_classic()%>%
  #geom_hline(yintercept=0.560,color="blue")+
  geom_hline(yintercept=0.537,color="red")+
  theme_classic()+
  ylim(c(0,1))+
  theme(legend.position="none")
  
p_mut_type_UTR_WT
ggsave("/home/users/ayh/Projects/27_A3B/06_Figure_code/edit_figure/Fig4/240222/mut_type_ratio_UTR.WT.sim.pdf",p_mut_type_UTR_WT,
       height=10,width=8)

mut_type_merge_df%>%filter(region%in%c("UTR3","UTR5"))%>%filter(TP53=="WT")
TP53  `C>T` `C>G` `C>T_ratio` `C>G_ratio` region
<fct> <dbl> <dbl>       <dbl>       <dbl> <fct> 
  1 WT       49    46       0.516       0.484 UTR3  
2 WT       17    10       0.630       0.370 UTR5  


xtab <- as.table(rbind(
  c(49, 16),
  c(17, 10)
))
dimnames(xtab) <- list(
  Survived = c("Yes", "No"),
  Class = c("1st", "2nd")
)
xtab
#>         Class
#> Survived 1st 2nd 3rd Crew
#>      Yes 203 118 178  212
#>      No  122 167 528  673
# Chi-square test

rstatix::chisq_test(xtab)

rstatix::chisq_test(xtab)%>%
  write.table("/home/users/ayh/Projects/27_A3B/06_Figure_code/edit_figure/Fig4/240222/mut_type_ratio_UTR.WT.sim.chisq.pval.tsv",
              sep="\t",
              quote=F,
              row.names=F)
  


p_mut_type_UTR_KO<-mut_type_merge_df%>%
  filter(region!="exon_internal")%>%
  filter(region!="intergenic")%>%
  filter(region!="genic")%>%
  select(-`C>T`,-`C>G`)%>%
  gather("mut_type","ratio",2:3)%>%
  filter(mut_type=="C>T_ratio")%>%
  filter(TP53=="KO")%>%
  filter(grepl("UTR",region))%>%
  mutate(col="black")%>%
  ggplot(aes(x=region,y=`ratio`,fill=region,col=col))+
  geom_bar(stat="identity",position="dodge")+
  theme_classic()%>%
  geom_hline(yintercept=0.560,color="blue")+
  #geom_hline(yintercept=0.537,color="red")+
  theme_classic()+
  ylim(c(0,1))+
  theme(legend.position="none")
p_mut_type_UTR_KO
ggsave("/home/users/ayh/Projects/27_A3B/06_Figure_code/edit_figure/Fig4/240222/mut_type_ratio_UTR.KO.sim.pdf",p_mut_type_UTR_KO,
       height=10,width=8)

mut_type_merge_df%>%filter(region%in%c("UTR3","UTR5"))%>%filter(TP53=="KO")
#TP53  `C>T` `C>G` `C>T_ratio` `C>G_ratio` region
#<fct> <dbl> <dbl>       <dbl>       <dbl> <fct> 
#  1 KO       32    30       0.516       0.484 UTR3  
#2 KO       12     8       0.6         0.4   UTR5  


xtab <- as.table(rbind(
  c(32, 30),
  c(12, 8)
))
dimnames(xtab) <- list(
  Survived = c("Yes", "No"),
  Class = c("1st", "2nd")
)
xtab
#>         Class
#> Survived 1st 2nd 3rd Crew
#>      Yes 203 118 178  212
#>      No  122 167 528  673
# Chi-square test

rstatix::chisq_test(xtab)

rstatix::chisq_test(xtab)%>%
  write.table("/home/users/ayh/Projects/27_A3B/06_Figure_code/edit_figure/Fig4/240222/mut_type_ratio_UTR.KO.sim.chisq.pval.tsv",
              sep="\t",
              quote=F,
              row.names=F)



p_mut_type_exon_internal<-mut_type_merge_df%>%
  filter(region!="exon_internal")%>%
  filter(region!="intergenic")%>%
  filter(region!="genic")%>%
  select(-`C>T`,-`C>G`)%>%
  gather("mut_type","ratio",2:3)%>%
  filter(mut_type=="C>T_ratio")%>%
  filter(!grepl("UTR",region))%>%
  mutate(col="col")%>%
  ggplot(aes(x=TP53,y=`ratio`,fill=region,col=col))+
  geom_bar(stat="identity",position="dodge")+
  theme_classic()%>%
  geom_hline(yintercept=0.560,color="blue")+
  geom_hline(yintercept=0.537,color="red")+
  theme_classic()+
  ylim(c(0,1))
p_mut_type_exon_internal
ggsave("/home/users/ayh/Projects/27_A3B/06_Figure_code/edit_figure/Fig4/240222/mut_type_ratio_exon_internal.sim.pdf",p_mut_type_exon_internal,
       height=10,width=8)
mut_type_merge_df%>%
  filter(region!="exon_internal")%>%
  filter(region!="intergenic")%>%
  filter(region!="genic")%>%
  filter(TP53=="WT")

#TP53  `C>T` `C>G` `C>T_ratio` `C>G_ratio` region  
#<fct> <dbl> <dbl>       <dbl>       <dbl> <fct>   
#  1 WT     1762  1640       0.518       0.482 intron  
#2 WT       49    46       0.516       0.484 UTR3    
#3 WT       17    10       0.630       0.370 UTR5    
#4 WT       74    48       0.607       0.393 exon_tot


xtab <- as.table(rbind(
  c(72, 48),
  c(1762, 1640)
))
dimnames(xtab) <- list(
  Survived = c("Yes", "No"),
  Class = c("1st", "2nd")
)
xtab
#>         Class
#> Survived 1st 2nd 3rd Crew
#>      Yes 203 118 178  212
#>      No  122 167 528  673
# Chi-square test

WT_stat<-rstatix::chisq_test(xtab)
WT_stat

mut_type_merge_df%>%
  filter(region!="exon_internal")%>%
  filter(region!="intergenic")%>%
  filter(region!="genic")%>%
  filter(TP53=="KO")


#TP53  `C>T` `C>G` `C>T_ratio` `C>G_ratio` region  
#<fct> <dbl> <dbl>       <dbl>       <dbl> <fct>   
#  1 KO     1386  1147       0.547       0.453 intron  
#2 KO       32    30       0.516       0.484 UTR3    
#3 KO       12     8       0.6         0.4   UTR5    
#4 KO       50    49       0.505       0.495 exon_tot


xtab2 <- as.table(rbind(
  c(50, 49),
  c(1386, 1147)
))
dimnames(xtab2) <- list(
  Survived = c("Yes", "No"),
  Class = c("1st", "2nd")
)
xtab
#>         Class
#> Survived 1st 2nd 3rd Crew
#>      Yes 203 118 178  212
#>      No  122 167 528  673
# Chi-square test

KO_stat<-rstatix::chisq_test(xtab2)

stat_bind_df<-rbind(
WT_stat,
KO_stat
)
stat_bind_df%>%
  write.table("/home/users/ayh/Projects/27_A3B/06_Figure_code/edit_figure/Fig4/240222/mut_type_ratio_exon.sim.chisq.pval.tsv",
              sep="\t",
              quote=F,
              row.names=F)

mut_type_merge_df%>%
  filter(region!="exon_internal")%>%
  select(-`C>T`,-`C>G`)%>%
  gather("mut_type","ratio",2:3)%>%
  #filter(mut_type=="C>T_ratio")%>%
  ggplot(aes(x=TP53,y=`ratio`,fill=region,col=mut_type))+
  geom_bar(stat="identity",position=position_dodge2())+
  theme_classic()%>%
  geom_hline(yintercept=0.560,color="blue")+
  geom_hline(yintercept=0.537,color="red")+
  theme_classic()


chisq.test(c(46,30),p=c(0.518,0.482))


intron_df%>%
  group_by(TP53,mut_type)%>%
  dplyr::summarise(n=n())%>%
  spread(mut_type,n)%>%
  mutate(`C>G_ratio`=`C>G`/(`C>T`+`C>G`),
         `C>T_ratio`=`C>T`/(`C>T`+`C>G`))%>%
  mutate(region="intron_tot")


UTR_gd_df<-read_tsv("/home/users/ayh/Projects/27_A3B/03_sequencing/single_clone_sequencing/WGS/28_genomic_feature/01_refernce/fin/v3/UTR/A3A.total.UTR5.rh.vcf.gd")%>%
  mutate(Tx=ifelse(gene_dir=="+",ifelse(REF=="C","U","T"),
                   ifelse(gene_dir=="-",ifelse(REF=="C","T","U"),"."))
  )%>%
  filter(gene_dir!=".")

UTR_gd_df$Tx<-factor(UTR_gd_df$Tx,levels=c("T","U"))
UTR_gd_df<-UTR_gd_df%>%arrange(Tx)
UTR_gd_df%>%
  filter(Tx=="T")%>%
  filter(TP53=="KO")
UTR_gd_df%>%
  filter(TP53=="KO")%>%
  filter(Tx=="T")%>%
  filter(mut_type=="C>G")

#UTR_gd_df%>%
UTR_gd_df%>%
  group_by(TP53,Tx,mut_type)%>%
  dplyr::summarise(n=n())%>%
  spread(mut_type,n)%>%
  mutate(`C>G_ratio`=`C>G`/(`C>T`+`C>G`),
         `C>T_ratio`=`C>T`/(`C>T`+`C>G`))%>%
  write.table("/home/users/ayh/Projects/27_A3B/06_Figure_code/edit_figure/Fig4/240222/data/UTR/UTR_mut_type.strand.tsv",
              quote=F,
              row.names=F,
              sep="\t")

