library(dplyr)
library(ggplot2)
library(tidyverse)

files_to_read<-list.files("/home/users/ayh/Projects/27_A3B/03_sequencing/single_clone_sequencing/WGS/24_mutect2_strelka2_baseq/merge/clonal/new_filter/sig_with_18",
                          "exposures.tsv",
                          full.names=T)

tmp<-lapply(files_to_read,function(x){
  read_tsv(x)%>%mutate(id=gsub(".mutect2.*","",basename(x)))
})

sig_df<-do.call(rbind,tmp)
sig_df$Exposure<-round(sig_df$Exposure,0)
metadata<-read_tsv("/home/users/ayh/Projects/27_A3B/03_sequencing/single_clone_sequencing/WGS/24_mutect2_strelka2_baseq/merge/clonal/sig/metadata.txt")
metadata%>%filter(grepl("SC",id))
id<-read_tsv("/home/users/ayh/Projects/27_A3B/03_sequencing/single_clone_sequencing/WGS/24_mutect2_strelka2_baseq/merge/clonal/sig/id.txt")
id_2<-read_tsv("/home/users/ayh/Projects/27_A3B/03_sequencing/single_clone_sequencing/WGS/24_mutect2_strelka2_baseq/merge/clonal/sig/id.2.txt")

sig_df<-left_join(sig_df,metadata)


sig_df<-left_join(sig_df,sig_df%>%filter(Signature%in%c("v3_13","v3_2"))%>%group_by(id)%>%dplyr::summarise(`2and13sum`=sum(Exposure)))

APOBEC_sig_df<-sig_df
#write.table(APOBEC_sig_df%>%select(id,APOBEC,dose,TP53,`m/d`,`2and13sum`)%>%unique(),"/home/users/ayh/Projects/27_A3B/06_Figure_code/indel/APOBEC_sig_df.txt",
#            row.name=F,
#            quote=F,
#            sep="\t"
#)


APOBEC_sig_df$dose<-factor(APOBEC_sig_df$dose,levels=c("CTRL","100ng","3ug"))
APOBEC_sig_df$TP53<-factor(APOBEC_sig_df$TP53,levels=c("WT","KO"))

APOBEC_sig_df$id<-factor(APOBEC_sig_df$id,levels=id$id)


blank_df<-read_tsv("/home/users/ayh/Projects/27_A3B/03_sequencing/single_clone_sequencing/WGS/24_mutect2_strelka2_baseq/merge/clonal/sig/blank_df.txt")

APOBEC_blank_sig_df2<-rbind(APOBEC_sig_df,blank_df)
APOBEC_blank_sig_df2$id<-factor(APOBEC_blank_sig_df$id,levels=id_2$id)



blank_df2<-read_tsv("/home/users/ayh/Projects/27_A3B/03_sequencing/single_clone_sequencing/WGS/24_mutect2_strelka2_baseq/merge/clonal/sig/blank_df.2.txt")
id_3<-read_tsv("/home/users/ayh/Projects/27_A3B/03_sequencing/single_clone_sequencing/WGS/24_mutect2_strelka2_baseq/merge/clonal/sig/id.3.txt")


APOBEC_blank_sig_df3<-APOBEC_sig_df%>%mutate(blank="X")%>%filter(Signature!="Unexplained")
APOBEC_blank_sig_df3<-rbind(APOBEC_blank_sig_df3,blank_df2)
APOBEC_blank_sig_df3$id<-factor(APOBEC_blank_sig_df3$id,levels=id_3$id)
APOBEC_blank_sig_df3$Signature<-factor(APOBEC_blank_sig_df3$Signature,levels=c("v3_2","v3_13","v3_1","v3_5","v3_17b","v3_18","v3_40"))




APOBEC_blank_sig_df3<-APOBEC_blank_sig_df3%>%mutate(Signature=gsub("v3_","SBS",Signature))
colnames(APOBEC_blank_sig_df3)[1]<-"SBS"
APOBEC_blank_sig_df3$SBS<-factor(APOBEC_blank_sig_df3$SBS,levels=c("SBS2","SBS13","SBS1","SBS5","SBS17b","SBS18","SBS40"))


blank_df3<-read_tsv("/home/users/ayh/Projects/27_A3B/03_sequencing/single_clone_sequencing/WGS/24_mutect2_strelka2_baseq/merge/clonal/sig/blank_df.4.txt")
id_4<-read_tsv("/home/users/ayh/Projects/27_A3B/03_sequencing/single_clone_sequencing/WGS/24_mutect2_strelka2_baseq/merge/clonal/sig/id.4.txt")

APOBEC_blank_sig_df5<-APOBEC_sig_df%>%mutate(blank="X")%>%filter(Signature!="Unexplained")
APOBEC_blank_sig_df5<-rbind(APOBEC_blank_sig_df5,blank_df3)

APOBEC_blank_sig_df5$id<-factor(APOBEC_blank_sig_df5$id,levels=id_4$id)
APOBEC_blank_sig_df5$Signature<-factor(APOBEC_blank_sig_df5$Signature,levels=c("v3_2","v3_13","v3_1","v3_5","v3_17b","v3_18","v3_40"))


APOBEC_blank_sig_df5<-APOBEC_blank_sig_df5%>%mutate(Signature=gsub("v3_","SBS",Signature))
colnames(APOBEC_blank_sig_df5)[1]<-"SBS"
APOBEC_blank_sig_df5$SBS<-factor(APOBEC_blank_sig_df5$SBS,levels=c("SBS2","SBS13","SBS1","SBS5","SBS17b","SBS18","SBS40"))





APOBEC_blank_sig_df4<-APOBEC_blank_sig_df3%>%mutate(type=paste(dose,TP53,sep="_"))

APOBEC_sum_df<-
  APOBEC_blank_sig_df4%>%
  filter(`m/d`=="D")%>%
  filter(!id%in%c("A3B_1st_C3_CTRL_WT_KO_blank","A3B_1st_C5_CTRL_base_blank","A3B_1st_C5_3ug_base_blank","A3B_1st_C5_3ug_WT_KO_blank"))%>%
  filter(!grepl("blank",id))%>%
  select(id,APOBEC,dose,TP53,type,SBS,Exposure)%>%spread(SBS,Exposure)%>%
  mutate(APOBEC_mutation=SBS2+SBS13,
         Age_mutation=SBS1+SBS5,
         `SBS5and40sum`=SBS5+SBS40
  )%>%
  arrange(APOBEC,TP53,dose)%>%
  mutate(ratio=`SBS5`/(`SBS5`+APOBEC_mutation))


lm_eqn <- function(df){
  m <- lm(SBS5and40sum ~ APOBEC_mutation, df);
  eq <- substitute(italic(y) == a + b %.% italic(x)*","~~italic(r)^2~"="~r2,
                   list(a = format(unname(coef(m)[1]), digits = 4),
                        b = format(unname(coef(m)[2]), digits = 4),
                        r2 = format(summary(m)$r.squared, digits = 4)))
  as.character(as.expression(eq));
}
WT_lm<-lm_eqn(APOBEC_sum_df%>%filter(APOBEC=="A3A"&TP53=="WT"))
KO_lm<-lm_eqn(APOBEC_sum_df%>%filter(APOBEC=="A3A"&TP53=="KO"))
#WT_lm<-lm(SBS5and40sum~APOBEC_mutation,APOBEC_sum_df%>%filter(APOBEC=="A3A"&TP53=="WT"))
#KO_lm<-lm(SBS5and40sum~APOBEC_mutation,APOBEC_sum_df%>%filter(APOBEC=="A3A"&TP53=="KO"))


library(ggpmisc)

p_sig_WT<-APOBEC_sum_df%>%
  filter(APOBEC=="A3A")%>%
  filter(TP53=="WT")%>%
  filter(!grepl("Ctrl",id))%>%
  ggplot(aes(x=APOBEC_mutation,y=SBS5and40sum))+
  geom_smooth(method="lm",formula=y~x)+
  geom_text(x = 500, y = 400, label = lm_eqn(APOBEC_sum_df%>%
                                               filter(APOBEC=="A3A")%>%
                                               filter(TP53=="WT")), parse = TRUE,size=5)+
  #stat_poly_eq(size=10) +
  geom_point(size=5)+
  theme_classic()+
  theme(axis.title.x=element_text(size=40),
        axis.text.x=element_text(size=30),
        #plot.title=element_text(size=30),
        axis.text.y=element_text(size=40),
        axis.title.y=element_text(size=40),
        axis.ticks.length=unit(.4, "cm"),
        axis.ticks = element_line(size = 4)
  )+scale_y_continuous(lim=c(0,500),
                       breaks=seq(0,500,by=125))+
  scale_x_continuous(lim=c(0,5000,
                           breaks=seq(0,5000,by=1000)))


#ggsave("/home/users/ayh/Projects/27_A3B/06_Figure_code/edit_figure/Fig5/sig_WT.pdf",p_sig_WT,
#       width=8,height=10)
p_sig_KO<-APOBEC_sum_df%>%
  filter(APOBEC=="A3A")%>%
  filter(TP53=="KO")%>%
  filter(!grepl("Ctrl",id))%>%
  ggplot(aes(x=APOBEC_mutation,y=SBS5and40sum))+
  geom_smooth(method="lm",formula=y~x)+
  geom_text(x = 500, y = 400, label = lm_eqn(APOBEC_sum_df%>%
                                               filter(APOBEC=="A3A")%>%
                                               filter(TP53=="KO")), parse = TRUE,size=5)+
  #stat_poly_line() +
  #stat_poly_eq(size=10) +
  geom_point(size=5)+
  theme_classic()+
  theme(axis.title.x=element_text(size=40),
        axis.text.x=element_text(size=30),
        #plot.title=element_text(size=30),
        axis.text.y=element_text(size=40),
        axis.title.y=element_text(size=40),
        axis.ticks.length=unit(.4, "cm"),
        axis.ticks = element_line(size = 4)
  )+scale_y_continuous(lim=c(0,500),
                       breaks=seq(0,500,by=125))+
  scale_x_continuous(lim=c(0,3500,
                           breaks=seq(0,4000,by=1000)))
p_sig_KO
#ggtitle("SBS5+40")
#ggsave("/home/users/ayh/Projects/27_A3B/06_Figure_code/edit_figure/Fig5/sig_KO.pdf",p_sig_KO,
#       width=8,height=10)


p_merge<-APOBEC_sum_df%>%
  filter(APOBEC=="A3A")%>%
  #filter(TP53=="WT")%>%
  filter(!grepl("Ctrl",id))%>%
  ggplot(aes(x=APOBEC_mutation,y=SBS5and40sum,col=TP53))+
  geom_smooth(method="lm",formula=y~x,fullrange=TRUE)+
  #  geom_text(x = 500, y = 400, label = lm_eqn(APOBEC_sum_df%>%
  #                                               filter(APOBEC=="A3A")%>%
  #                                               filter(TP53=="WT")), parse = TRUE,size=5)+
  #stat_poly_eq(size=10) +
  geom_point(size=5)+
  theme_classic()+
  theme(axis.title.x=element_text(size=40),
        axis.text.x=element_text(size=30),
        #plot.title=element_text(size=30),
        axis.text.y=element_text(size=40),
        axis.title.y=element_text(size=40),
        axis.ticks.length=unit(.4, "cm"),
        axis.ticks = element_line(size = 4)
  )+scale_y_continuous(lim=c(0,450),
                       breaks=seq(0,400,by=100))+
  scale_x_continuous(lim=c(0,5000,
                           breaks=seq(0,5000,by=1000)))+
  ylab("SBS5+SBS40")+
  guides(color="none")+
  geom_text(x = 1000, y = 350, label = lm_eqn(APOBEC_sum_df%>%
                                               filter(APOBEC=="A3A")%>%
                                               filter(TP53=="WT")), parse = TRUE,size=5)+
  geom_text(x = 1000, y = 100, label = lm_eqn(APOBEC_sum_df%>%
                                               filter(APOBEC=="A3A")%>%
                                               filter(TP53=="KO")), parse = TRUE,size=5)

p_merge
ggsave("/home/users/ayh/Projects/27_A3B/06_Figure_code/edit_figure/Fig5/sig_merge.v2.pdf",p_merge,
       width=8,height=10)
