library(dplyr)
library(tidyverse)

indel_vcf_list<-list.files("/home/users/ayh/Projects/27_A3B/03_sequencing/single_clone_sequencing/WGS/24_mutect2_strelka2_baseq/merge/indel/clonal/sig",
                           "exposures.tsv",
                           full.names=T)
indel_vcf_list

indel_vcf_tmp<-lapply(indel_vcf_list,function(x){
  read_tsv(x)%>%mutate(id=gsub(".mutect2.*","",basename(x)))
}
)

indel_vcf_merge_df<-do.call(rbind,indel_vcf_tmp)

metadata<-read_tsv("/home/users/ayh/Projects/27_A3B/03_sequencing/single_clone_sequencing/WGS/24_mutect2_strelka2_baseq/merge/indel/clonal/sig/metadata.txt")

indel_vcf_merge_df<-left_join(indel_vcf_merge_df,metadata)%>%
  filter(`m/d`=="D")

indel_vcf_merge_df%>%mutate(Exposure=round(Exposure,0))%>%
  select(`Signature #`,Exposure,id,APOBEC,dose,TP53)%>%
  spread(`Signature #`,Exposure)%>%
  write.table("/home/users/ayh/Projects/27_A3B/03_sequencing/single_clone_sequencing/WGS/24_mutect2_strelka2_baseq/merge/indel/clonal/sig/indel_summary.df",
              sep="\t",
              quote=F,
              row.names=F)
  

merge_sum_df<-read_tsv("/home/users/ayh/Projects/27_A3B/03_sequencing/single_clone_sequencing/WGS/24_mutect2_strelka2_baseq/merge/indel/clonal/sig/indel_snv_summary.df")
merge_sum_df$dose<-factor(merge_sum_df$dose,levels=c("CTRL","100ng/ml","3ug/ml"))
merge_sum_df%>%
#group_by(APOBEC)%>%
  ggplot(aes(x=dose,y=nTCN))+
  geom_boxplot()+
  geom_jitter()+
  facet_wrap(~APOBEC+TP53)

merge_sum_df%>%
  #group_by(APOBEC)%>%
  ggplot(aes(x=dose,y=ID9))+
  geom_boxplot()+
  geom_jitter()+
  facet_wrap(~APOBEC+TP53)


merge_sum_df%>%
  filter(APOBEC=="A3A")%>%
  filter(TP53=="WT")%>%
  ggplot(aes(x=`2and13sum`,y=nTCN))+
  geom_point()+
  geom_smooth(method="lm")


merge_sum_df%>%
  filter(APOBEC=="A3A")%>%
  filter(TP53=="WT")%>%
  filter(dose!="CTRL")
p_ID9<-merge_sum_df%>%
  filter(APOBEC=="A3A")%>%
  filter(TP53=="WT")%>%
  filter(dose!="CTRL")%>%
  ggplot(aes(x=`2and13sum`,y=ID9))+
  geom_point()+
  geom_smooth(method="lm",col="red")+
  theme_classic()+
  scale_y_continuous(lim=c(0,25))+
  scale_x_continuous(lim=c(0,5000))
p_ID9
ggsave("/home/users/ayh/Projects/27_A3B/06_Figure_code/edit_figure/Fig2/ID9_SNV.pdf",p_ID9,
       height=10,width=10)
#[1] "italic(y) == c(`(Intercept)` = \"6.9\") + c(``2and13sum`` = \"0.003\") %.% italic(x) * \",\" ~ ~italic(r)^2 ~ \"=\" ~ \"0.399\""


p_KO_ID9<-merge_sum_df%>%
  filter(APOBEC=="A3A")%>%
  #filter(TP53=="WT")%>%
  filter(dose!="CTRL")%>%
  ggplot(aes(x=`2and13sum`,y=ID9))+
  geom_point()+
  geom_smooth(method="lm",col="red")+
  theme_classic()+
  scale_y_continuous(lim=c(0,25))+
  scale_x_continuous(lim=c(0,5000))

p_KO_ID9
lm_eqn1 <- function(df){
  m <- lm(nTCN ~ `2and13sum`, df);
  eq <- substitute(italic(y) == a + b %.% italic(x)*","~~italic(r)^2~"="~r2, 
                   list(a = format(coef(m)[1], digits = 2), 
                        b = format(coef(m)[2], digits = 2), 
                        r2 = format(summary(m)$r.squared, digits = 3)))
  as.character(as.expression(eq));                 
}

lm_eqn2 <- function(df){
  m <- lm(ID9 ~ `2and13sum`, df);
  eq <- substitute(italic(y) == a + b %.% italic(x)*","~~italic(r)^2~"="~r2, 
                   list(a = format(coef(m)[1], digits = 5), 
                        b = format(coef(m)[2], digits = 5), 
                        r2 = format(summary(m)$r.squared, digits = 5)))
  as.character(as.expression(eq));                 
}

lm_eqn1(merge_sum_df%>%filter(APOBEC=="A3A")%>%
         filter(TP53=="WT"))


lm_eqn2(merge_sum_df%>%filter(APOBEC=="A3A")%>%
          filter(TP53=="WT")%>%
          filter(dose!="CTRL"))
