library(dplyr)
library(tidyverse)
library(ggplot2)


RTCA_count_df<-read_tsv("/home/users/ayh/Projects/27_A3B/06_Figure_code/edit_figure/Fig2/GRCh37.jolim.RTCA_count.tsv")%>%plyr::rename(c("value"="ref_RTCA"))%>%mutate(ref_RTCA=ref_RTCA*2)
YTCA_count_df<-read_tsv("/home/users/ayh/Projects/27_A3B/06_Figure_code/edit_figure/Fig2/GRCh37.jolim.YTCA_count.tsv")%>%plyr::rename(c("value"="ref_YTCA"))%>%mutate(ref_YTCA=ref_YTCA*2)
YR_ratio<-(133867172/87202632)
YR_log_ratio<-log10(YR_ratio)

yeast_A3A_count_slope<-1.552
yeast_A3B_count_slope<-0.6746

yeast_A3A_count_log_slope<-log10(1.552)
yeast_A3B_count_log_slope<-log10(0.6746)
pcawg_APOBEC_df<-read_tsv("/home/users/ayh/Projects/27_A3B/03_sequencing/single_clone_sequencing/WGS/36_RTCA_YTCA/pcawg/pcawg.clonal.enrichment.ref_tot.tsv")

pcawg_APOBEC_df


org_APOBEC_df<-read_tsv("/home/users/ayh/Projects/27_A3B/06_Figure_code/edit_figure/Fig2/RTCA_YTCA_rate.v4.tsv")


cont_merge_df<-rbind(org_APOBEC_df%>%filter(APOBEC=="A3A",TP53=="WT")%>%filter(dose!="CTRL")%>%
  select(id,sum_YTCA,sum_RTCA)%>%
  plyr::rename(c("sum_YTCA"="Mut_YTCA","sum_RTCA"="Mut_RTCA"))%>%
  mutate(sub_project_code="A3A"),
pcawg_APOBEC_df%>%filter(`2and13sum`>5000)%>%
  select(id,sub_project_code,sum_Mut_YTCA,sum_Mut_RTCA)%>%
  plyr::rename(c("sum_Mut_YTCA"="Mut_YTCA","sum_Mut_RTCA"="Mut_RTCA"))
)
cont_merge_df
pcawg_APOBEC_df%>%filter(RTCA_rate>YTCA_rate)%>%
  arrange(-`2and13sum`)%>%
  select(-APOBEC,-adj.p)

cont_merge_df<-cont_merge_df%>%mutate(groups=ifelse(sub_project_code=="A3A","org","cancer"))

cont_merge_df%>%
  arrange(Mut_YTCA)
p_YTCA_RTCA_count<-cont_merge_df%>%
  ggplot(aes(x=Mut_RTCA,y=Mut_YTCA,col=groups))+
  geom_point()+
  scale_y_log10(lim=c(1,18000))+
  scale_x_log10(lim=c(1,18000))+
  geom_abline(slope=1,linetype="dashed")+
  geom_abline(slope=1, intercept=YR_log_ratio)+
  
  theme_classic()+
  geom_abline(slope=1,intercept=yeast_A3A_count_log_slope,col="green",alpha=0.5,linetype="dashed")+
  geom_abline(slope=1,intercept=yeast_A3B_count_log_slope,col="blue",alpha=0.5,linetype="dashed")+
  geom_abline(slope=1,intercept=log10(2.7),col="red",linetype="dashed")+ ## A3A
  geom_abline(slope=1,intercept=log10(2.5),col="orange",linetype="dashed")+ ## A3A
  geom_smooth(method="lm",fullrange=TRUE)
p_YTCA_RTCA_count
ggsave("/home/users/ayh/Projects/27_A3B/06_Figure_code/edit_figure/Fig2/YTCA_RTCA_count.pdf",p_YTCA_RTCA_count,
       width=8,height=10)

lm_eqn <- function(df){
  m <- lm(Mut_YTCA ~ Mut_RTCA, df);
  eq <- substitute(italic(y) == a + b %.% italic(x)*","~~italic(r)^2~"="~r2, 
                   list(a = format(coef(m)[1], digits = 2), 
                        b = format(coef(m)[2], digits = 2), 
                        r2 = format(summary(m)$r.squared, digits = 3)))
  as.expression(eq);                 
}

lm_log_eqn <- function(df){
  m <- lm(Mut_log_YTCA ~ Mut_log_RTCA, df);
  eq <- substitute(italic(y) == a + b %.% italic(x)*","~~italic(r)^2~"="~r2, 
                   list(a = format(coef(m)[1], digits = 2), 
                        b = format(coef(m)[2], digits = 2), 
                        r2 = format(summary(m)$r.squared, digits = 3)))
  as.expression(eq);                 
}


lm_eqn(org_APOBEC_df%>%filter(APOBEC=="A3A",TP53=="WT")%>%filter(dose!="CTRL")%>%plyr::rename(c("sum_RTCA"="Mut_RTCA","sum_YTCA"="Mut_YTCA")))
lm_eqn(cont_merge_df%>%filter(groups=="cancer"))

lm_log_eqn(org_APOBEC_df%>%filter(APOBEC=="A3A",TP53=="WT")%>%filter(dose!="CTRL")%>%plyr::rename(c("sum_RTCA"="Mut_RTCA","sum_YTCA"="Mut_YTCA"))%>%
         mutate(Mut_log_YTCA=log10(Mut_YTCA),
                Mut_log_RTCA=log10(Mut_RTCA)))
lm_log_eqn(cont_merge_df%>%filter(groups=="cancer")%>%
             mutate(Mut_log_YTCA=log10(Mut_YTCA),
                    Mut_log_RTCA=log10(Mut_RTCA)))

  

pcawg_APOBEC_df%>%filter(`2and13sum`>5000)%>%
  group_by(sub_project_code)%>%
  dplyr::summarise(n=n())

pcawg_APOBEC_df%>%filter(`2and13sum`>10000)%>%
  group_by(sub_project_code)%>%
  dplyr::summarise(n=n())

pcawg_APOBEC_df%>%
  group_by(sub_project_code)%>%
  dplyr::summarise(n=n())

left_join(cont_merge_df,
pcawg_APOBEC_df%>%select(id,`2and13sum`,sub_project_code))%>%
  write.table("/home/users/ayh/Projects/27_A3B/06_Figure_code/edit_figure/Fig2/YTCA_RTCA_count.merge.tsv",
             sep="\t",
             quote=F,
             row.names=F)
  
pcawg_APOBEC_df%>%
  write.table("/home/users/ayh/Projects/27_A3B/06_Figure_code/edit_figure/Fig2/pcawg.YTCA_RTCA.pval.tsv",
              sep="\t",
              quote=F,
              row.names=F)
  