library(dplyr)
library(tidyverse)
library(ggplot2)



metadata<-read_tsv("/home/users/ayh/Projects/27_A3B/03_sequencing/single_clone_sequencing/WTS/13_RNA_editing_bam_process/dna/depth/metadata.v2.txt")%>%
  dplyr::select(APOBEC,id,time,dose)
metadata

rna_edit_vcf_files <- list.files("/home/users/ayh/Projects/27_A3B/03_sequencing/single_clone_sequencing/WTS/19_RNA_editing_corrected/vcf",
                                 "corrected.vcf.table.txt$",
                                 full.names=TRUE)




rna_edit_tmp<-lapply(rna_edit_vcf_files,function(x){
  read.csv(x,sep=" ")%>%mutate(id=gsub("_co.*","",basename(x)))
})

rna_edit_df<-do.call(rbind,rna_edit_tmp)



rna_edit_count_merge_df<-left_join(rna_edit_df,metadata%>%select(id,APOBEC,time,dose))

rna_edit_count_merge_df<-rna_edit_count_merge_df%>%
  group_by(APOBEC,time,dose,Var1)%>%
  dplyr::summarise(count=median(Freq))%>%
  plyr::rename(c("Var1"="sig_cont"))


redit_sig_df<-rna_edit_count_merge_df
palette.COSMIC.SNV.192 <- paste0("#",c(`C>A`="1DBDF1",`C>G`="04080B",`C>T`="E82825",`T>A`="CACACA",`T>C`="A3CE62",`T>G`="EDC7C6",
                                       `G>A`="94D5E9",`G>C`="5E6265",`G>T`="E26C14",`A>C`="EBE7EC",`A>G`="D9F677",`A>T`="F6DDD9"))

#redit_sig_df$Substitution%>%unique()
#redit_sig_df$Substitution<-factor(redit_sig_df$Substitution,levels=redit_sig_df$Substitution%>%unique())


contextorder96 <- paste0(
  rep(rep(c("A","C","G","T"),each=4),4), #firstbase
  rep(c("C","T"),each=48),
  ">",
  rep(c("A","G","T","A","C","G"), each=16),
  rep(c("A","C","G","T"),16) # lastbase
)

contextorder96_rev <- paste0(
  rep(rep(c("A","C","G","T"),each=4),4), #firstbase
  rep(c("G","A"),each=48),
  ">",
  rep(c("A","C","T","C","G","T"), each=16),
  rep(c("A","C","G","T"),16) # lastbase
)

contextorder192<-c(contextorder96,contextorder96_rev)

redit_sig_df$sig_cont<-factor(redit_sig_df$sig_cont,levels=contextorder192)

redit_sig_df<-redit_sig_df%>%arrange(APOBEC,time,dose,sig_cont)
redit_sig_df<-left_join(redit_sig_df,redit_sig_df%>%group_by(APOBEC,time,dose)%>%
                          dplyr::summarise(tot_count=sum(count)))#%>%
#mutate(ratio=sum_count/tot_count)
redit_sig_df<-redit_sig_df%>%
  mutate(ratio=count/tot_count)
redit_sig_df$ratio%>%max()
#0.1714664
redit_sig_df$ratio[redit_sig_df$ratio>0]%>%min()
#0.00001703752

#redit_sig_df$sig_cont
redit_sig_df<-redit_sig_df%>%ungroup()

redit_sig_df<-redit_sig_df%>%mutate(type_3=rep(c(1:192),6))%>%
  mutate(Substitution=substr(sig_cont,2,4))
redit_sig_df$Substitution<-factor(redit_sig_df$Substitution,levels=redit_sig_df$Substitution%>%unique())
redit_sig_df$Substitution
redit_sig_df<-redit_sig_df%>%mutate(axis=rep(c("up","down"),576/2))
redit_sig_df<-redit_sig_df%>%mutate(info=paste(APOBEC,time,dose,sep="_"))
redit_sig_df$info%>%unique()
redit_sig_df%>%arrange(-count)
i="A3A_48h_3ug/ml"
for(i in redit_sig_df$info%>%unique()){
  df<-redit_sig_df%>%filter(info==i)
  h1<-ggplot(df)+
    geom_hline(yintercept=seq(0,0.2,by=0.05)
               ,linetype="dotted",col="grey")+
    geom_bar(mapping=aes(x=type_3,y=count,fill=Substitution),
             stat="identity",
             position="dodge",
             width=0.5)+
    #  guides(fill = guide_legend(ncol = 2))+
    theme(#axis.text.x.bottom = element_blank(),
      axis.ticks.x =element_line(size=1,colour="black"),
      axis.ticks.y=element_line(size=1,colour="grey"),
      axis.ticks.length.y=unit(-0.25,"cm"),
      panel.spacing.x = unit(0, "mm"),
      axis.title.x = element_blank(),
      strip.background.x = element_blank(),
      strip.text.x = element_blank())+
    #facet_grid(.~Substitution, scales = "free_x")+
    theme(axis.text.x=element_text(angle=90,vjust=0.5,hjust=20,size=20,family="Consolas"),
          axis.text.y=element_text(size=55),
          axis.title.y=element_text(size=60)
    )+
    theme(panel.grid.major = element_blank(),
          panel.grid.minor = element_blank(),
          panel.background = element_blank(),
          panel.border = element_rect(colour = "grey", fill=NA, size=1),
    )+
    scale_x_continuous(
      breaks = df$type_3%>%unique(),
      labels = (df[1:192,])$sig_cont,
      expand = c(0.01, 0)
    )+
    scale_y_continuous(
      limits=c(0,2600),
      expand=c(0,0),
      breaks=seq(0,2500,by=500)
      #breaks=seq(0,0.2,by=0.05)
      
    )+
    xlab("")+ylab("")+
    scale_fill_manual(values=palette.COSMIC.SNV.192)+theme(legend.position = "none")+
    ylab("Composition ratio")
  
  h1
  h2<-ggplot(df)+
    geom_bar(mapping = aes(x = type_3, y = 1, fill = Substitution),
             stat = "identity",
             width = 1)+
    theme_void()+
    theme(panel.spacing.x = unit(0, "mm"),)+
    facet_grid(.~Substitution, scales = "free_x")+
    scale_fill_manual(values=palette.COSMIC.SNV.192)+
    theme(legend.position = "none")+
    theme(strip.text.x=element_text(size=40))
  
  h2
  library(cowplot)
  legend <- plot_grid(get_legend(h2), get_legend(h1), ncol = 1)
  h1 <- h1 + theme(legend.position = "none")
  h2 <- h2 + theme(legend.position = "none")
  
  plot<-plot_grid(h2, h1, align = "v", ncol = 1, axis = "tblr", rel_heights = c(0.5, 5))
  #plot
  #plot
  dev.off()
  plot
  save_plot(paste0("/home/users/ayh/Projects/27_A3B/06_Figure_code/edit_figure/Fig2/redit.original.sig.",gsub("/","_",i),".v2.pdf"),
            plot,base_height = 7,
            device=cairo_pdf)
}
h1<-ggplot(redit_sig_df%>%filter(Sig=="B"))+
  geom_hline(yintercept=seq(0,0.3,by=0.1)
             ,linetype="dotted",col="grey")+
  geom_bar(mapping=aes(x=type_3,y=ratio,fill=Substitution),
           stat="identity",
           position="dodge",
           width=0.5)+
  #  guides(fill = guide_legend(ncol = 2))+
  theme(#axis.text.x.bottom = element_blank(),
    axis.ticks.x = element_blank(),
    axis.ticks.y=element_line(size=1,colour="grey"),
    axis.ticks.length.y=unit(-0.25,"cm"),
    panel.spacing.x = unit(0, "mm"),
    axis.title.x = element_blank(),
    strip.background.x = element_blank(),
    strip.text.x = element_blank())+
  #facet_grid(.~Substitution, scales = "free_x")+
  theme(axis.text.x=element_text(angle=90,vjust=0.5,hjust=1,size=20,family="Consolas"),
        axis.text.y=element_text(size=55),
        axis.title.y=element_text(size=60)
  )+
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.background = element_blank(),
        panel.border = element_rect(colour = "grey", fill=NA, size=1),
  )+
  scale_x_continuous(
    breaks = redit_sig_df$type_3%>%unique(),
    labels = (redit_sig_df[1:192,])$Trinucleotide,
    expand = c(0.01, 0)
  )+
  scale_y_continuous(
    limits=c(0,0.3),
    expand=c(0,0),
    breaks=seq(0,0.3,by=0.1)
    
  )+
  xlab("")+ylab("")+
  scale_fill_manual(values=palette.COSMIC.SNV.192)+theme(legend.position = "none")+
  ylab("Composition ratio")

h1
h2<-ggplot(redit_sig_df)+
  geom_bar(mapping = aes(x = type_3, y = 1, fill = Substitution),
           stat = "identity",
           width = 1)+
  theme_void()+
  theme(panel.spacing.x = unit(0, "mm"),)+
  facet_grid(.~Substitution, scales = "free_x")+
  scale_fill_manual(values=palette.COSMIC.SNV.192)+
  theme(legend.position = "none")+
  theme(strip.text.x=element_text(size=40))

h2
library(cowplot)
legend <- plot_grid(get_legend(h2), get_legend(h1), ncol = 1)
h1 <- h1 + theme(legend.position = "none")
h2 <- h2 + theme(legend.position = "none")

plot_A3A<-plot_grid(h2, h1, align = "v", ncol = 1, axis = "tblr", rel_heights = c(0.5, 5))
plot

redit_count_df<-left_join(rna_edit_df,metadata%>%select(id,APOBEC,time,dose))%>%
  filter(grepl("C>T",Var1))%>%
  plyr::rename(c("Var1"="sig_cont"))%>%
  group_by(id,APOBEC,time,dose)%>%
  dplyr::summarise(tot_C_T_edit=sum(Freq))


redit_count_df<-redit_count_df%>%
  mutate(cond=paste(time,dose,sep="_"))%>%
  as_tibble()

redit_count_ci_df<-redit_count_df%>%
  group_by(APOBEC,cond)%>%
  summarise(mean.count = mean(tot_C_T_edit, na.rm = TRUE),
            sd.count = sd(tot_C_T_edit, na.rm = TRUE),
            n.count = n()) %>%
  mutate(se.count = sd.count / sqrt(n.count),
         lower.ci.count = mean.count - qt(1 - (0.05 / 2), n.count - 1) * se.count,
         upper.ci.count = mean.count + qt(1 - (0.05 / 2), n.count - 1) * se.count)


redit_count_ci_df%>%
  filter(APOBEC=="A3A")%>%
  ggplot(aes(x=cond,y=mean.count,col="black"))+
  geom_boxplot()+
  geom_errorbar(aes(y=mean.count,ymax=upper.ci.count,ymin=lower.ci.count),width=.5)+
  
  #scale_y_continuous(lim=c(0,1))+
  theme_bw()+
  theme(axis.title.x=element_blank(),
        axis.text.x=element_text(size=30),
        #plot.title=element_text(size=30),
        axis.text.y=element_text(size=40),
        axis.title.y=element_text(size=40),
        axis.ticks.length=unit(.4, "cm"),
        axis.ticks = element_line(size = 4)
  )+
  
  scale_color_manual(values=c("black"))+
  geom_jitter(width=0.1,color="black",size=3)+
  theme_classic()+
  theme(legend.position="none")+
  ylim(c(0,8000))

geom_errorbar(aes(y))

#facet_grid(~group_2+APOBEC_rel,
#           scale="free_x",space="free"
#)+

stat_boxplot(aes(ymin = ..lower.., ymax = ..upper..),outlier.shape = NA) +
  stat_boxplot(geom = "errorbar", aes(ymin = ..ymax..)) +
  stat_boxplot(geom = "errorbar", aes(ymax = ..ymin..)) +
  geom_jitter(width=0.1,color="black",size=3)+
  theme_classic()+
  theme(legend.position="none")+
  ylim(c(0,8000))

p_A3A<-redit_count_df%>%
  filter(APOBEC=="A3A")%>%
  
  ggplot(aes(x=cond,y=tot_C_T_edit,col="black"))+
  geom_boxplot()+
  
  #facet_grid(~group_2+APOBEC_rel,
  #           scale="free_x",space="free"
  #)+
  
  stat_boxplot(aes(ymin = ..lower.., ymax = ..upper..),outlier.shape = NA) +
  stat_boxplot(geom = "errorbar", aes(ymin = ..ymax..)) +
  stat_boxplot(geom = "errorbar", aes(ymax = ..ymin..)) +
  geom_jitter(width=0.1,color="black",size=3)+
  theme_classic()+
  theme(legend.position="none")+
  ylim(c(0,8000))

p_A3A
ggsave("/home/users/ayh/Projects/27_A3B/06_Figure_code/edit_figure/Fig2/RNA_editing_cor_count.tot.A3A.pdf",p_A3A,
       width=8,height=10)


p_A3B<-redit_count_df%>%
  filter(APOBEC=="A3B")%>%
  
  ggplot(aes(x=cond,y=tot_C_T_edit,col="black"))+
  geom_boxplot()+
  
  #facet_grid(~group_2+APOBEC_rel,
  #           scale="free_x",space="free"
  #)+
  
  stat_boxplot(aes(ymin = ..lower.., ymax = ..upper..),outlier.shape = NA) +
  stat_boxplot(geom = "errorbar", aes(ymin = ..ymax..)) +
  stat_boxplot(geom = "errorbar", aes(ymax = ..ymin..)) +
  geom_jitter(width=0.1,color="black",size=3)+
  theme_classic()+
  theme(legend.position="none")+
  ylim(c(0,8000))
p_A3B
ggsave("/home/users/ayh/Projects/27_A3B/06_Figure_code/edit_figure/Fig2/RNA_editing_cor_count.tot.A3B.pdf",p_A3B,
       width=8,height=10)

rna_edit_sig_merge_df%>%
  filter(APOBEC=="A3A")%>%
  #filter(sig=="rC")%>%
  ggplot(aes(x=cond,y=redit_count,fill=sig))+
  geom_boxplot()+
  facet_wrap(~sig)

rna_edit_sig_merge_df%>%
  filter(APOBEC=="A3B")%>%
  #filter(sig=="rC")%>%
  ggplot(aes(x=cond,y=redit_count,fill=sig))+
  geom_boxplot()+
  facet_wrap(~sig)


rna_edit_sig_merge_df%>%
  filter(APOBEC=="A3A")%>%
  dplyr::select(-redit_count)%>%
  spread(sig,ratio)
spread(sig,redit_count)

rna_edit_sig_merge_df%>%
  #    filter(APOBEC=="A3A")%>%
  filter(sig%in%c("rB","rC"))