library(dplyr)
library(tidyverse)
library(ggplot2)
len_Tx_p_TCN<-read_tsv("/home/users/ayh/Projects/27_A3B/03_sequencing/single_clone_sequencing/WGS/28_genomic_feature/01_refernce/fin/v3/Tx/Tx.+.TCN.txt",col_names=c("len"))
len_Tx_n_TCN<-read_tsv("/home/users/ayh/Projects/27_A3B/03_sequencing/single_clone_sequencing/WGS/28_genomic_feature/01_refernce/fin/v3/Tx/Tx.-.TCN.txt",col_names=c("len"))
len_Tx_p_NGA<-read_tsv("/home/users/ayh/Projects/27_A3B/03_sequencing/single_clone_sequencing/WGS/28_genomic_feature/01_refernce/fin/v3/Tx/Tx.+.NGA.txt",col_names=c("len"))
len_Tx_n_NGA<-read_tsv("/home/users/ayh/Projects/27_A3B/03_sequencing/single_clone_sequencing/WGS/28_genomic_feature/01_refernce/fin/v3/Tx/Tx.-.NGA.txt",col_names=c("len"))


tx_ref_T_len<-(len_Tx_n_TCN$len+len_Tx_p_NGA$len)*2
tx_ref_U_len<-(len_Tx_p_TCN$len+len_Tx_n_NGA$len)*2
tx_df<-read_tsv("/home/users/ayh/Projects/27_A3B/03_sequencing/single_clone_sequencing/WGS/28_genomic_feature/01_refernce/fin/v3/Tx/A3A.total.gene.raw.rh.vcf.gd")%>%
  unique()

tx_df<-tx_df%>%
  mutate(Tx=ifelse(gene_dir=="+",ifelse(REF=="C","U","T"),
                   ifelse(gene_dir=="-",ifelse(REF=="C","T","U"),"."))
  )%>%
  filter(gene_dir!=".")


tx_df%>%
  group_by(id,TP53,Tx)%>%
  dplyr::summarise(n=n())%>%
  spread(Tx,n)%>%
  mutate(T_rate=`T`/tx_ref_T_len,
         U_rate=`U`/tx_ref_U_len)%>%
  mutate("T_ratio"=`T_rate`/(`T_rate`+`U_rate`),
         "U_ratio"=`U_rate`/(`T_rate`+`U_rate`))%>%
  write.table("/home/users/ayh/Projects/27_A3B/06_Figure_code/edit_figure/Fig4/240222/data/Tx/Tx_sample.txt",
              sep="\t",
              quote=F,
              row.names=F,
              )

tx_df%>%select(-id)%>%unique()%>%
  group_by(TP53,Tx)%>%
  dplyr::summarise(n=n())%>%
  spread(Tx,n)%>%
  mutate(T_rate=ifelse(TP53=="WT",`T`/tx_ref_T_len/10,`T`/tx_ref_T_len/6),
         U_rate=ifelse(TP53=="WT",`U`/tx_ref_U_len/10,`U`/tx_ref_U_len/6))%>%
  mutate("T_ratio"=`T_rate`/(`T_rate`+`U_rate`),
         "U_ratio"=`U_rate`/(`T_rate`+`U_rate`))%>%
write.table("/home/users/ayh/Projects/27_A3B/06_Figure_code/edit_figure/Fig4/240222/data/Tx/Tx_TP53.txt",
            sep="\t",
            quote=F,
            row.names=F,
)



p_tx<-tx_df%>%select(-id)%>%unique()%>%
  group_by(TP53,Tx)%>%
  dplyr::summarise(n=n())%>%
  spread(Tx,n)%>%
  mutate(T_rate=ifelse(TP53=="WT",`T`/tx_ref_T_len/10,`T`/tx_ref_T_len/6),
         U_rate=ifelse(TP53=="WT",`U`/tx_ref_U_len/10,`U`/tx_ref_U_len/6))%>%
  mutate("T_ratio"=`T_rate`/(`T_rate`+`U_rate`),
         "U_ratio"=`U_rate`/(`T_rate`+`U_rate`))%>%
  filter(TP53=="WT")%>%
  gather(region,rate,T_rate:U_rate)%>%
  ggplot(aes(x=region,y=rate*1000000,fill=region,col="TP53"))+
  geom_bar(stat="identity")+
  #ggtitle("WT_Tx")+
  theme_classic()+
  theme(axis.title.x=element_blank(),
        axis.text.x=element_text(size=30),
        #plot.title=element_text(size=30),
        axis.text.y=element_text(size=40),
        axis.title.y=element_text(size=40),
        axis.ticks.length=unit(.4, "cm"),
        axis.ticks = element_line(size = 4),
        legend.position="none"
  )+
  scale_colour_manual(values=c("black"))+
  ylim(c(0,3))


p_tx
ggsave("/home/users/ayh/Projects/27_A3B/06_Figure_code/edit_figure/Fig4/240222//Tx.pdf",p_tx,
       height=10,width=8)

if(FALSE){
avg_rate_df<-read_tsv("/home/users/ayh/Projects/27_A3B/03_sequencing/single_clone_sequencing/WGS/28_genomic_feature/01_refernce/fin/v2/average_mut_rate_TCN_NGA.txt")
p_tx2<-tx_df%>%select(-id)%>%unique()%>%
  group_by(TP53,Tx)%>%
  dplyr::summarise(n=n())%>%
  spread(Tx,n)%>%
  mutate(T_rate=ifelse(TP53=="WT",`T`/tx_ref_T_len/10,`T`/tx_ref_T_len/6),
         U_rate=ifelse(TP53=="WT",`U`/tx_ref_U_len/10,`U`/tx_ref_U_len/6))%>%
  mutate("T_ratio"=`T_rate`/(`T_rate`+`U_rate`),
         "U_ratio"=`U_rate`/(`T_rate`+`U_rate`))%>%
  filter(TP53=="WT")%>%
  gather(region,rate,T_rate:U_rate)%>%
  ggplot(aes(x=region,y=rate*1000000,fill=region,col="TP53"))+
  geom_bar(stat="identity")+
  #ggtitle("WT_Tx")+
  theme_classic()+
  theme(axis.title.x=element_blank(),
        axis.text.x=element_text(size=30),
        #plot.title=element_text(size=30),
        axis.text.y=element_text(size=40),
        axis.title.y=element_text(size=40),
        axis.ticks.length=unit(.4, "cm"),
        axis.ticks = element_line(size = 4),
        legend.position="none"
  )+
  scale_colour_manual(values=c("black"))+
  ylim(c(0,6.3))+
  geom_hline(yintercept=(avg_rate_df%>%filter(TP53=="WT"))$mut_rate*1000000,linetype="dashed")


p_tx2
ggsave("/home/users/ayh/Projects/27_A3B/06_Figure_code/edit_figure/Fig4/231122/Tx.v2.pdf",p_tx2,
       height=10,width=8)
}

tx_df%>%select(-id)%>%unique()%>%
  group_by(TP53,Tx)%>%
  dplyr::summarise(n=n())%>%
  spread(Tx,n)

rstatix::chisq_test(c(1933,2176),p=c(tx_ref_T_len/(tx_ref_T_len+tx_ref_U_len),tx_ref_U_len/(tx_ref_T_len+tx_ref_U_len)))%>%
  write.table("/home/users/ayh/Projects/27_A3B/06_Figure_code/edit_figure/Fig4/240222/data/Tx/Tx.chisq.pval.txt",
              sep="\t",
              quote=F,
              row.names=F)


tx_df%>%
  group_by(id,TP53,Tx)%>%
  dplyr::summarise(n=n())%>%
  spread(Tx,n)%>%
  mutate(T_rate=`T`/tx_ref_T_len,
         U_rate=`U`/tx_ref_U_len)%>%
  mutate("T_ratio"=`T_rate`/(`T_rate`+`U_rate`),
         "U_ratio"=`U_rate`/(`T_rate`+`U_rate`))%>%
  ungroup()%>%
  select(id,TP53,T_rate,U_rate)%>%
  gather(class,rate,3:4)%>%
  mutate(class=gsub("_rate","",class))%>%
  group_by(TP53)%>%
  rstatix::t_test(rate~class)

#TP53  .y.   group1 group2    n1    n2 statistic    df     p
#* <chr> <chr> <chr>  <chr>  <int> <int>     <dbl> <dbl> <dbl>
#KO    rate  T      U          6     6    -0.294  10.0 0.775
#WT    rate  T      U         10    10    -0.204  17.9 0.841


tx_rate_ci_df<-
  tx_df%>%
  group_by(id,TP53,Tx)%>%
  dplyr::summarise(n=n())%>%
  spread(Tx,n)%>%
  mutate(T_rate=`T`/tx_ref_T_len,
         U_rate=`U`/tx_ref_U_len)%>%
  mutate("T_ratio"=`T_rate`/(`T_rate`+`U_rate`),
         "U_ratio"=`U_rate`/(`T_rate`+`U_rate`))%>%
  ungroup()%>%
  select(id,TP53,T_rate,U_rate)%>%
  gather(class,rate,3:4)%>%
  mutate(class=gsub("_rate","",class))%>%
  group_by(TP53,class)%>%
  summarise(mean.rate = mean(rate, na.rm = TRUE),
            sd.rate = sd(rate, na.rm = TRUE),
            n.rate = n()) %>%
  mutate(se.rate = sd.rate / sqrt(n.rate),
         lower.ci.rate = mean.rate - qt(1 - (0.05 / 2), n.rate - 1) * se.rate,
         upper.ci.rate = mean.rate + qt(1 - (0.05 / 2), n.rate - 1) * se.rate)
tx_rate_ci_df
tx_rate_ci_df$TP53<-factor(tx_rate_ci_df$TP53,level=c("WT","KO"))
tx_rate_ci_df$class<-factor(tx_rate_ci_df$class,levels=c("T","U"))
p_tx_rate<-tx_rate_ci_df%>%
  filter(TP53=="WT")%>%
  ggplot(aes(x=class,y=mean.rate*1000000,col="black",fill=class))+
  geom_bar(stat="identity")+
  geom_errorbar(aes(y=mean.rate*1000000,ymax=upper.ci.rate*1000000,ymin=lower.ci.rate*1000000),width=.5)+
  #facet_wrap(~TP53)+
  theme_classic()+
  theme(axis.title.x=element_blank(),
        axis.text.x=element_text(size=30),
        #plot.title=element_text(size=30),
        axis.text.y=element_text(size=40),
        axis.title.y=element_text(size=40),
        axis.ticks.length=unit(.4, "cm"),
        axis.ticks = element_line(size = 4),
        legend.position="none"
  )+
  scale_colour_manual(values=c("black"))+
  scale_y_continuous(lim=c(0,15))
p_tx_rate
ggsave("/home/users/ayh/Projects/27_A3B/06_Figure_code/edit_figure/Fig4/231122/Tx.sample.ci.pdf",p_tx_rate,
       height=10,width=8)



tx_fin_df<-tx_df%>%select(-id)%>%unique()%>%
  group_by(TP53,Tx)%>%
  dplyr::summarise(n=n())%>%
  spread(Tx,n)%>%
  mutate(T_rate=ifelse(TP53=="WT",`T`/tx_ref_T_len/10,`T`/tx_ref_T_len/6),
         U_rate=ifelse(TP53=="WT",`U`/tx_ref_U_len/10,`U`/tx_ref_U_len/6))%>%
  mutate("T_ratio"=`T_rate`/(`T_rate`+`U_rate`),
         "U_ratio"=`U_rate`/(`T_rate`+`U_rate`))%>%
  filter(TP53=="WT")%>%
  gather(region,rate,T_rate:U_rate)
tx_fin_df
fisher.test(rbind(c(1638,1848),c(tx_ref_T_len,tx_ref_U_len)))
tx_ref_T_len
tx_ref_U_len

tx_df%>%select(-id,-TP53)%>%unique()%>%
  group_by(Tx)%>%
  dplyr::summarise(n=n())%>%
  spread(Tx,n)%>%
  mutate(T_rate=`T`/tx_ref_T_len/12,
         U_rate=`U`/tx_ref_U_len/12)%>%
  mutate("T_ratio"=`T_rate`/(`T_rate`+`U_rate`),
         "U_ratio"=`U_rate`/(`T_rate`+`U_rate`))



p_tx_ratio<-tx_df%>%group_by(id,TP53,Tx)%>%
  filter(TP53=="WT")%>%
  dplyr::summarise(n=n())%>%
  spread(Tx,n)%>%
  mutate(T_rate=T/tx_ref_T_len,
         U_rate=U/tx_ref_U_len)%>%
  mutate(T_ratio=T_rate/(T_rate+U_rate),
         U_ratio=U_rate/(T_rate+U_rate))%>%
  select(id,TP53,T_ratio,U_ratio)%>%
  gather(class,ratio,T_ratio:U_ratio)%>%
  group_by(TP53,class)%>%
  summarise(mean.ratio = mean(ratio, na.rm = TRUE),
            sd.ratio = sd(ratio, na.rm = TRUE),
            n.ratio = n()) %>%
  mutate(se.ratio = sd.ratio / sqrt(n.ratio),
         lower.ci.ratio = mean.ratio - qt(1 - (0.05 / 2), n.ratio - 1) * se.ratio,
         upper.ci.ratio = mean.ratio + qt(1 - (0.05 / 2), n.ratio - 1) * se.ratio)%>%
  ggplot(aes(x=class,y=mean.ratio,col="black",fill=class))+
  geom_bar(stat="identity")+
  geom_errorbar(aes(y=mean.ratio,ymax=upper.ci.ratio,ymin=lower.ci.ratio),width=.5)+
  #  facet_wrap(~TP53)+
  theme_classic()+
  theme(axis.title.x=element_blank(),
        axis.text.x=element_text(size=30),
        #plot.title=element_text(size=30),
        axis.text.y=element_text(size=40),
        axis.title.y=element_text(size=40),
        axis.ticks.length=unit(.4, "cm"),
        axis.ticks = element_line(size = 4),
        legend.position="none"
  )+
  scale_colour_manual(values=c("black"))+
  scale_y_continuous(lim=c(0,1))

ggsave("/home/users/ayh/Projects/27_A3B/06_Figure_code/edit_figure/Fig4/231122/Tx.ratio.pdf",p_tx_ratio,
       height=10,width=8)


tx_df%>%group_by(id,TP53,Tx)%>%
  #  filter(TP53=="WT")%>%
  dplyr::summarise(n=n())%>%
  spread(Tx,n)%>%
  mutate(T_rate=T/tx_ref_T_len,
         U_rate=U/tx_ref_U_len)%>%
  mutate(T_ratio=T_rate/(T_rate+U_rate),
         U_ratio=U_rate/(T_rate+U_rate))%>%
  select(id,TP53,T_ratio,U_ratio)%>%
  gather(class,ratio,T_ratio:U_ratio)%>%
  group_by(TP53)%>%
  rstatix::t_test(ratio~class,p.adjust.method = "BH")

#TP53  .y.   group1  group2     n1    n2 statistic    df        p
#* <chr> <chr> <chr>   <chr>   <int> <int>     <dbl> <dbl>    <dbl>
#  1 KO    ratio T_ratio U_ratio     6     6     -3.35  10.0 0.00738 
#2 WT    ratio T_ratio U_ratio    10    10     -4.22  18   0.000511


##C>T,C>G separate##
#####################

tx_df%>%
  group_by(id,TP53,mut_type,Tx)%>%
  dplyr::summarise(n=n())%>%
  spread(Tx,n)%>%
  mutate(T_rate=ifelse(TP53=="WT",`T`/tx_ref_T_len/10,`T`/tx_ref_T_len/6),
         U_rate=ifelse(TP53=="WT",`U`/tx_ref_U_len/10,`U`/tx_ref_U_len/6))%>%
  mutate("T_ratio"=`T_rate`/(`T_rate`+`U_rate`),
         "U_ratio"=`U_rate`/(`T_rate`+`U_rate`))%>%
  write.table("/home/users/ayh/Projects/27_A3B/06_Figure_code/edit_figure/Fig4/240222/data/Tx/Tx_sample.ratio.mut_type.txt",
              sep="\t",
              quote=F,
              row.names=F,
  )




tx_df%>%select(-id)%>%unique()%>%
  group_by(TP53,mut_type,Tx)%>%
  dplyr::summarise(n=n())%>%
  spread(Tx,n)%>%
  mutate(T_rate=`T`/tx_ref_T_len/6,
         U_rate=`U`/tx_ref_U_len/6)%>%
  mutate("T_ratio"=`T_rate`/(`T_rate`+`U_rate`),
         "U_ratio"=`U_rate`/(`T_rate`+`U_rate`))%>%
  write.table("/home/users/ayh/Projects/27_A3B/06_Figure_code/edit_figure/Fig4/240222/data/Tx/Tx_TP53.ratio.mut_type.txt",
              sep="\t",
              quote=F,
              row.names=F,
  )



tx_df%>%select(-id,-TP53)%>%unique()%>%
  group_by(mut_type,Tx)%>%
  dplyr::summarise(n=n())%>%
  spread(Tx,n)%>%
  mutate(T_rate=`T`/tx_ref_T_len/12,
         U_rate=`U`/tx_ref_U_len/12)%>%
  mutate("T_ratio"=`T_rate`/(`T_rate`+`U_rate`),
         "U_ratio"=`U_rate`/(`T_rate`+`U_rate`))





##C>T,C>G compare##
#####################

tx_df$mut_type<-factor(tx_df$mut_type,levels=c("C>T","C>G"))


tx_df%>%
  group_by(id,TP53,mut_type,Tx)%>%
  dplyr::summarise(n=n())%>%
  spread(Tx,n)%>%
  mutate(T_rate=`T`/tx_ref_T_len,
         U_rate=`U`/tx_ref_U_len)%>%
  mutate("T_ratio"=`T_rate`/(`T_rate`+`U_rate`),
         "U_ratio"=`U_rate`/(`T_rate`+`U_rate`))%>%
  write.table("/home/users/ayh/Projects/27_A3B/06_Figure_code/edit_figure/Fig4/240222/data/Tx/Tx_sample.ratio.mut_type.region.txt",
              sep="\t",
              quote=F,
              row.names=F,
  )


tx_df%>%
  group_by(TP53,mut_type,Tx)%>%
  dplyr::summarise(n=n())%>%
  spread(Tx,n)%>%
  mutate(T_rate=ifelse(TP53=="WT",`T`/tx_ref_T_len/10,`T`/tx_ref_T_len/6),
         U_rate=ifelse(TP53=="WT",`U`/tx_ref_U_len/10,`U`/tx_ref_U_len/6))%>%
  mutate("T_ratio"=`T_rate`/(`T_rate`+`U_rate`),
         "U_ratio"=`U_rate`/(`T_rate`+`U_rate`))%>%
  write.table("/home/users/ayh/Projects/27_A3B/06_Figure_code/edit_figure/Fig4/240222/data/Tx/Tx_TP53.ratio.mut_type.region.txt",
              sep="\t",
              quote=F,
              row.names=F,
  )



tx_df%>%
  group_by(id,TP53,mut_type,Tx)%>%
  dplyr::summarise(n=n())%>%
  spread(Tx,n)%>%
  mutate(T_rate=`T`/tx_ref_T_len,
         U_rate=`U`/tx_ref_U_len)%>%
  mutate("T_ratio"=`T_rate`/(`T_rate`+`U_rate`),
         "U_ratio"=`U_rate`/(`T_rate`+`U_rate`))%>%
  select(-U,-U_ratio,-T_ratio,-T_rate,-U_rate)%>%
  ungroup()%>%
  spread(mut_type,`T`)%>%
  mutate(`C>T_ratio`=`C>T`/(`C>T`+`C>G`))%>%
  mutate(`C>G_ratio`=`C>G`/(`C>T`+`C>G`))%>%
  mutate(class=)


tx_df%>%
  group_by(id,TP53,mut_type,Tx)%>%
  dplyr::summarise(n=n())%>%
  spread(Tx,n)%>%
  mutate(T_rate=`T`/tx_ref_T_len,
         U_rate=`U`/tx_ref_U_len)%>%
  mutate("T_ratio"=`T_rate`/(`T_rate`+`U_rate`),
         "U_ratio"=`U_rate`/(`T_rate`+`U_rate`))%>%
  select(-`T`,-T_rate,-U_rate,-T_ratio,-U_ratio)%>%
  ungroup()%>%
  spread(mut_type,`U`)%>%
  mutate(`C>T_ratio`=`C>T`/(`C>T`+`C>G`))%>%
  mutate(`C>G_ratio`=`C>G`/(`C>T`+`C>G`))


tx_df%>%select(-id)%>%unique()%>%
  group_by(TP53,mut_type,Tx)%>%
  dplyr::summarise(n=n())%>%
  spread(Tx,n)%>%
  mutate(T_rate=`T`/tx_ref_T_len,
         U_rate=`U`/tx_ref_U_len)%>%
  mutate("T_ratio"=`T_rate`/(`T_rate`+`U_rate`),
         "U_ratio"=`U_rate`/(`T_rate`+`U_rate`))%>%
  select(-`U`,-T_ratio,-U_ratio,-T_rate,-U_rate)%>%
  ungroup()%>%
  spread(mut_type,`T`)%>%
  mutate(`C>T_ratio`=`C>T`/(`C>T`+`C>G`))%>%
  mutate(`C>G_ratio`=`C>G`/(`C>T`+`C>G`))



tx_df%>%select(-id)%>%unique()%>%
  group_by(TP53,mut_type,Tx)%>%
  dplyr::summarise(n=n())%>%
  spread(Tx,n)%>%
  mutate(T_rate=`T`/tx_ref_T_len,
         U_rate=`U`/tx_ref_U_len)%>%
  mutate("T_ratio"=`T_rate`/(`T_rate`+`U_rate`),
         "U_ratio"=`U_rate`/(`T_rate`+`U_rate`))%>%
  select(-`T`,-T_ratio,-U_ratio,-T_rate,-U_rate)%>%
  ungroup()%>%
  spread(mut_type,`U`)%>%
  mutate(`C>T_ratio`=`C>T`/(`C>T`+`C>G`))%>%
  mutate(`C>G_ratio`=`C>G`/(`C>T`+`C>G`))



tx_df%>%select(-id,-TP53)%>%unique()%>%
  group_by(mut_type,Tx)%>%
  dplyr::summarise(n=n())%>%
  spread(Tx,n)%>%
  mutate(T_rate=`T`/tx_ref_T_len,
         U_rate=`U`/tx_ref_U_len)%>%
  mutate("T_ratio"=`T_rate`/(`T_rate`+`U_rate`),
         "U_ratio"=`U_rate`/(`T_rate`+`U_rate`))%>%
  select(-`U`,-T_ratio,-U_ratio,-T_rate,-U_rate)%>%
  ungroup()%>%
  spread(mut_type,`T`)%>%
  mutate(`C>T_ratio`=`C>T`/(`C>T`+`C>G`))%>%
  mutate(`C>G_ratio`=`C>G`/(`C>T`+`C>G`))


tx_df%>%select(-id,-TP53)%>%unique()%>%
  group_by(mut_type,Tx)%>%
  dplyr::summarise(n=n())%>%
  spread(Tx,n)%>%
  mutate(T_rate=`T`/tx_ref_T_len,
         U_rate=`U`/tx_ref_U_len)%>%
  mutate("T_ratio"=`T_rate`/(`T_rate`+`U_rate`),
         "U_ratio"=`U_rate`/(`T_rate`+`U_rate`))%>%
  select(-`T`,-T_ratio,-U_ratio,-T_rate,-U_rate)%>%
  ungroup()%>%
  spread(mut_type,`U`)%>%
  mutate(`C>T_ratio`=`C>T`/(`C>T`+`C>G`))%>%
  mutate(`C>G_ratio`=`C>G`/(`C>T`+`C>G`))
