tx_ref_pos_TCN_df<-read.csv("/home/users/ayh/Projects/27_A3B/03_sequencing/single_clone_sequencing/WGS/28_genomic_feature/01_refernce/fin/Tx/Tx.+.TCN.txt",
                          sep="\t",
                          header=F)%>%as.tibble()

tx_ref_neg_TCN_df<-read.csv("/home/users/ayh/Projects/27_A3B/03_sequencing/single_clone_sequencing/WGS/28_genomic_feature/01_refernce/fin/Tx/Tx.-.TCN.txt",
                            sep="\t",
                            header=F)%>%as.tibble()

tx_ref_pos_NGA_df<-read.csv("/home/users/ayh/Projects/27_A3B/03_sequencing/single_clone_sequencing/WGS/28_genomic_feature/01_refernce/fin/Tx/Tx.+.NGA.txt",
                            sep="\t",
                            header=F)%>%as.tibble()

tx_ref_neg_NGA_df<-read.csv("/home/users/ayh/Projects/27_A3B/03_sequencing/single_clone_sequencing/WGS/28_genomic_feature/01_refernce/fin/Tx/Tx.-.NGA.txt",
                            sep="\t",
                            header=F)%>%as.tibble()

tx_ref_T_len<-tx_ref_pos_NGA_df$V1+tx_ref_neg_TCN_df$V1
tx_ref_U_len<-tx_ref_neg_NGA_df$V1+tx_ref_pos_TCN_df$V1

tx_df<-read_tsv("/home/users/ayh/Projects/27_A3B/03_sequencing/single_clone_sequencing/WGS/28_genomic_feature/01_refernce/fin/Tx/A3A.total.gene.raw.rh.vcf.gd")

tx_df<-tx_df%>%
  mutate(Tx=ifelse(gene_dir=="+",ifelse(REF=="C","U","T"),
                   ifelse(gene_dir=="-",ifelse(REF=="C","T","U"),"."))
  )%>%
  filter(gene_dir!=".")

tx_df%>%
  group_by(id,TP53,Tx)%>%
  dplyr::summarise(n=n())%>%
  spread(Tx,n)%>%
  mutate(T_rate=`T`/tx_ref_T_len,
         U_rate=`U`/tx_ref_U_len)%>%
  mutate("T_ratio"=`T_rate`/(`T_rate`+`U_rate`),
         "U_ratio"=`U_rate`/(`T_rate`+`U_rate`))



tx_df%>%select(-id)%>%unique()%>%
  group_by(TP53,Tx)%>%
  dplyr::summarise(n=n())%>%
  spread(Tx,n)%>%
  mutate(T_rate=`T`/tx_ref_T_len/6,
         U_rate=`U`/tx_ref_U_len/6)%>%
  mutate("T_ratio"=`T_rate`/(`T_rate`+`U_rate`),
         "U_ratio"=`U_rate`/(`T_rate`+`U_rate`))

tx_df%>%select(-id)%>%unique()%>%
  group_by(TP53,Tx)%>%
  dplyr::summarise(n=n())%>%
  spread(Tx,n)%>%
  mutate(T_rate=`T`/tx_ref_T_len/6,
         U_rate=`U`/tx_ref_U_len/6)%>%
  mutate("T_ratio"=`T_rate`/(`T_rate`+`U_rate`),
         "U_ratio"=`U_rate`/(`T_rate`+`U_rate`))%>%
filter(TP53=="WT")%>%
  gather(region,rate,T_rate:U_rate)%>%
  ggplot(aes(x=region,y=rate,fill=region))+
  geom_bar(stat="identity")+
  ggtitle("WT_Tx")+
  theme_bw()+
  theme(axis.text=element_text(size=20),
        axis.title=element_text(size=30))



tx_df%>%select(-id,-TP53)%>%unique()%>%
  group_by(Tx)%>%
  dplyr::summarise(n=n())%>%
  spread(Tx,n)%>%
  mutate(T_rate=`T`/tx_ref_T_len/12,
         U_rate=`U`/tx_ref_U_len/12)%>%
  mutate("T_ratio"=`T_rate`/(`T_rate`+`U_rate`),
         "U_ratio"=`U_rate`/(`T_rate`+`U_rate`))
  


##C>T,C>G separate##
#####################

tx_df%>%
  group_by(id,TP53,mut_type,Tx)%>%
  dplyr::summarise(n=n())%>%
  spread(Tx,n)%>%
  mutate(T_rate=`T`/tx_ref_T_len/6,
         U_rate=`U`/tx_ref_U_len/6)%>%
  mutate("T_ratio"=`T_rate`/(`T_rate`+`U_rate`),
         "U_ratio"=`U_rate`/(`T_rate`+`U_rate`))%>%
  print(n=100)



tx_df%>%select(-id)%>%unique()%>%
  group_by(TP53,mut_type,Tx)%>%
  dplyr::summarise(n=n())%>%
  spread(Tx,n)%>%
  mutate(T_rate=`T`/tx_ref_T_len/6,
         U_rate=`U`/tx_ref_U_len/6)%>%
  mutate("T_ratio"=`T_rate`/(`T_rate`+`U_rate`),
         "U_ratio"=`U_rate`/(`T_rate`+`U_rate`))


tx_df%>%select(-id,-TP53)%>%unique()%>%
  group_by(mut_type,Tx)%>%
  dplyr::summarise(n=n())%>%
  spread(Tx,n)%>%
  mutate(T_rate=`T`/tx_ref_T_len/12,
         U_rate=`U`/tx_ref_U_len/12)%>%
  mutate("T_ratio"=`T_rate`/(`T_rate`+`U_rate`),
         "U_ratio"=`U_rate`/(`T_rate`+`U_rate`))





##C>T,C>G compare##
#####################

tx_df$mut_type<-factor(tx_df$mut_type,levels=c("C>T","C>G"))
tx_df%>%
  group_by(id,TP53,mut_type,Tx)%>%
  dplyr::summarise(n=n())%>%
  spread(Tx,n)%>%
  mutate(T_rate=`T`/tx_ref_T_len,
         U_rate=`U`/tx_ref_U_len)%>%
  mutate("T_ratio"=`T_rate`/(`T_rate`+`U_rate`),
         "U_ratio"=`U_rate`/(`T_rate`+`U_rate`))%>%
  select(-U,-U_ratio,-T_ratio,-T_rate,-U_rate)%>%
  ungroup()%>%
  spread(mut_type,`T`)%>%
  mutate(`C>T_ratio`=`C>T`/(`C>T`+`C>G`))%>%
  mutate(`C>G_ratio`=`C>G`/(`C>T`+`C>G`))%>%
  print(n=100)
  

tx_df%>%
  group_by(id,TP53,mut_type,Tx)%>%
  dplyr::summarise(n=n())%>%
  spread(Tx,n)%>%
  mutate(T_rate=`T`/tx_ref_T_len,
         U_rate=`U`/tx_ref_U_len)%>%
  mutate("T_ratio"=`T_rate`/(`T_rate`+`U_rate`),
         "U_ratio"=`U_rate`/(`T_rate`+`U_rate`))%>%
  select(-`T`,-T_rate,-U_rate,-T_ratio,-U_ratio)%>%
  ungroup()%>%
  spread(mut_type,`U`)%>%
  mutate(`C>T_ratio`=`C>T`/(`C>T`+`C>G`))%>%
  mutate(`C>G_ratio`=`C>G`/(`C>T`+`C>G`))


tx_df%>%select(-id)%>%unique()%>%
  group_by(TP53,mut_type,Tx)%>%
  dplyr::summarise(n=n())%>%
  spread(Tx,n)%>%
  mutate(T_rate=`T`/tx_ref_T_len,
         U_rate=`U`/tx_ref_U_len)%>%
  mutate("T_ratio"=`T_rate`/(`T_rate`+`U_rate`),
         "U_ratio"=`U_rate`/(`T_rate`+`U_rate`))%>%
  select(-`U`,-T_ratio,-U_ratio,-T_rate,-U_rate)%>%
  ungroup()%>%
  spread(mut_type,`T`)%>%
  mutate(`C>T_ratio`=`C>T`/(`C>T`+`C>G`))%>%
  mutate(`C>G_ratio`=`C>G`/(`C>T`+`C>G`))



tx_df%>%select(-id)%>%unique()%>%
  group_by(TP53,mut_type,Tx)%>%
  dplyr::summarise(n=n())%>%
  spread(Tx,n)%>%
  mutate(T_rate=`T`/tx_ref_T_len,
         U_rate=`U`/tx_ref_U_len)%>%
  mutate("T_ratio"=`T_rate`/(`T_rate`+`U_rate`),
         "U_ratio"=`U_rate`/(`T_rate`+`U_rate`))%>%
  select(-`T`,-T_ratio,-U_ratio,-T_rate,-U_rate)%>%
  ungroup()%>%
  spread(mut_type,`U`)%>%
  mutate(`C>T_ratio`=`C>T`/(`C>T`+`C>G`))%>%
  mutate(`C>G_ratio`=`C>G`/(`C>T`+`C>G`))



tx_df%>%select(-id,-TP53)%>%unique()%>%
  group_by(mut_type,Tx)%>%
  dplyr::summarise(n=n())%>%
  spread(Tx,n)%>%
  mutate(T_rate=`T`/tx_ref_T_len,
         U_rate=`U`/tx_ref_U_len)%>%
  mutate("T_ratio"=`T_rate`/(`T_rate`+`U_rate`),
         "U_ratio"=`U_rate`/(`T_rate`+`U_rate`))%>%
  select(-`U`,-T_ratio,-U_ratio,-T_rate,-U_rate)%>%
  ungroup()%>%
  spread(mut_type,`T`)%>%
  mutate(`C>T_ratio`=`C>T`/(`C>T`+`C>G`))%>%
  mutate(`C>G_ratio`=`C>G`/(`C>T`+`C>G`))


tx_df%>%select(-id,-TP53)%>%unique()%>%
  group_by(mut_type,Tx)%>%
  dplyr::summarise(n=n())%>%
  spread(Tx,n)%>%
  mutate(T_rate=`T`/tx_ref_T_len,
         U_rate=`U`/tx_ref_U_len)%>%
  mutate("T_ratio"=`T_rate`/(`T_rate`+`U_rate`),
         "U_ratio"=`U_rate`/(`T_rate`+`U_rate`))%>%
  select(-`T`,-T_ratio,-U_ratio,-T_rate,-U_rate)%>%
  ungroup()%>%
  spread(mut_type,`U`)%>%
  mutate(`C>T_ratio`=`C>T`/(`C>T`+`C>G`))%>%
  mutate(`C>G_ratio`=`C>G`/(`C>T`+`C>G`))



