library(dplyr)
library(tidyverse)
library(ggplot2)
library(ggprism)

##1. clonal A3A mutations##
###########################

library(dplyr)
library(ggplot2)
library(tidyverse)

files_to_read<-list.files("/home/users/ayh/Projects/27_A3B/03_sequencing/single_clone_sequencing/WGS/24_mutect2_strelka2_baseq/merge/clonal/new_filter/sig_with_18",
                          "exposures.tsv",
                          full.names=T)

tmp<-lapply(files_to_read,function(x){
  read_tsv(x)%>%mutate(id=gsub(".mutect2.*","",basename(x)))
})

sig_df<-do.call(rbind,tmp)
sig_df$Exposure<-round(sig_df$Exposure,0)
metadata<-read_tsv("/home/users/ayh/Projects/27_A3B/03_sequencing/single_clone_sequencing/WGS/24_mutect2_strelka2_baseq/merge/clonal/sig/metadata.txt")

id<-read_tsv("/home/users/ayh/Projects/27_A3B/03_sequencing/single_clone_sequencing/WGS/24_mutect2_strelka2_baseq/merge/clonal/sig/id.txt")
id_2<-read_tsv("/home/users/ayh/Projects/27_A3B/03_sequencing/single_clone_sequencing/WGS/24_mutect2_strelka2_baseq/merge/clonal/sig/id.2.txt")

sig_df<-left_join(sig_df,metadata)


sig_df<-left_join(sig_df,sig_df%>%filter(Signature%in%c("v3_13","v3_2"))%>%group_by(id)%>%dplyr::summarise(`2and13sum`=sum(Exposure)))

APOBEC_sig_df<-sig_df


APOBEC_sig_df$dose<-factor(APOBEC_sig_df$dose,levels=c("CTRL","100ng","3ug"))
APOBEC_sig_df$TP53<-factor(APOBEC_sig_df$TP53,levels=c("WT","KO"))

APOBEC_sig_df$id<-factor(APOBEC_sig_df$id,levels=id$id)

blank_df<-read_tsv("/home/users/ayh/Projects/27_A3B/03_sequencing/single_clone_sequencing/WGS/24_mutect2_strelka2_baseq/merge/clonal/sig/blank_df.txt")

blank_df3<-read_tsv("/home/users/ayh/Projects/27_A3B/03_sequencing/single_clone_sequencing/WGS/24_mutect2_strelka2_baseq/merge/clonal/sig/blank_df.4.txt")
id_4<-read_tsv("/home/users/ayh/Projects/27_A3B/03_sequencing/single_clone_sequencing/WGS/24_mutect2_strelka2_baseq/merge/clonal/sig/id.4.txt")


APOBEC_blank_sig_df5<-APOBEC_sig_df%>%mutate(blank="X")%>%filter(Signature!="Unexplained")
APOBEC_blank_sig_df5<-rbind(APOBEC_blank_sig_df5,blank_df3)

APOBEC_blank_sig_df5$id<-factor(APOBEC_blank_sig_df5$id,levels=id_4$id)
APOBEC_blank_sig_df5$Signature<-factor(APOBEC_blank_sig_df5$Signature,levels=c("v3_2","v3_13","v3_1","v3_5","v3_17b","v3_18","v3_40"))


APOBEC_blank_sig_df5<-APOBEC_blank_sig_df5%>%mutate(Signature=gsub("v3_","SBS",Signature))
APOBEC_blank_sig_df5$SBS<-factor(APOBEC_blank_sig_df5$SBS,levels=c("SBS2","SBS13","SBS1","SBS5","SBS17b","SBS18","SBS40"))


##2.botseq###
#############


sat_df<-read.csv("/home/users/ayh/Projects/99_scripts/annotation/botseq/snv/satellite_hg19_rename.bed",header=F,sep="\t")%>%as.tibble()%>%select(V1,V2,V3)
colnames(sat_df)<-c("CHROM","POS1","POS2")
sat_df<-sat_df%>%mutate(dist=POS2-POS1+1)%>%filter(!grepl("GL",CHROM)|!grepl("Y",CHROM))

bot_vcf<-list.files("/home/users/ayh/Projects/27_A3B/03_sequencing/single_clone_sequencing/WGS/15_bot_seq/A3A/integrate/vcf/240321/sig",
                    "exposures.tsv",
                    full.names=T)
bot_meta<-read_tsv("/home/users/ayh/Projects/27_A3B/03_sequencing/single_clone_sequencing/WGS/15_bot_seq/A3A/A3A_conditional_bat1/after_id_swap/04_varscan/snv/annotation/DS/metadata.txt")



bot_tmp<-lapply(bot_vcf,function(x){

  read_tsv(x)%>%mutate(id=gsub(".snp.*","",basename(x)))
})

bot_merge_vcf<-do.call(rbind,bot_tmp)
bot_merge_vcf<-left_join(bot_merge_vcf,bot_meta)


cov_file<-read_tsv("/home/users/ayh/Projects/27_A3B/03_sequencing/single_clone_sequencing/WGS/15_bot_seq/A3A/integrate/depth/metadata.v3.txt")

cov_tmp<-lapply(cov_file$file,function(x){
  print(x)
  read.csv(x,header=F,sep="\t")%>%mutate(id=gsub(".fin.*","",basename(x)))
})


cov_merge_vcf<-do.call(rbind,cov_tmp)%>%select(id,V2)
colnames(cov_merge_vcf)<-c("id","covered_length")
cov_merge_vcf<-left_join(cov_merge_vcf,cov_file,by=c("id"="new_id"))%>%select(-file)%>%
  plyr::rename(c("id.y"="old_id"))

bot_merge_vcf$Exposure<-round(bot_merge_vcf$Exposure,0)

bot_new_merge_vcf<-left_join(bot_merge_vcf%>%select(id,time,dose,batch,Signature,Exposure),bot_merge_vcf%>%filter(Signature%in%c("v3_2","v3_13"))%>%group_by(id)%>%dplyr::summarise(`2and13sum`=sum(Exposure)))

snv_merge_vcf<-left_join(bot_new_merge_vcf,cov_merge_vcf)


snv_merge_vcf<-snv_merge_vcf%>%mutate(tot_mut_rate=`2and13sum`/covered_length*1000000)%>%mutate(mut_rate=Exposure/(covered_length*(151*2-110)/(151*2))*1000000)%>%mutate(APOBEC="A3A")%>%filter(batch%in%c("bat1","bat3"))

A3A_snv_merge_vcf<-snv_merge_vcf

##botseq_A3B###
###############

bot_vcf<-list.files("/home/users/ayh/Projects/27_A3B/03_sequencing/single_clone_sequencing/WGS/15_bot_seq/A3A/integrate/vcf/240321/sig",
                    "exposures.tsv",
                    full.names=T)

bot_meta<-read_tsv("/home/users/ayh/Projects/27_A3B/03_sequencing/single_clone_sequencing/WGS/15_bot_seq/A3B/integrate/metadata.txt")


bot_tmp<-lapply(bot_vcf,function(x){
  # print(x)
  #  print(basename(x))
  read_tsv(x)%>%mutate(id=gsub(".snp.*","",basename(x)))
})

bot_merge_vcf<-do.call(rbind,bot_tmp)

bot_merge_vcf<-left_join(bot_merge_vcf,bot_meta)%>%filter(!is.na(DNA_file))


cov_file<-read_tsv("/home/users/ayh/Projects/27_A3B/03_sequencing/single_clone_sequencing/WGS/15_bot_seq/A3B/integrate/depth/metadata.v4.txt")


cov_tmp<-lapply(cov_file$file,function(x){
  print(x)
  read.csv(x,header=F,sep="\t")%>%mutate(id=gsub(".fin.*","",basename(x)))
})


cov_merge_vcf<-do.call(rbind,cov_tmp)%>%select(id,V2)

colnames(cov_merge_vcf)<-c("id","covered_length")

cov_merge_vcf<-left_join(cov_merge_vcf,cov_file)%>%select(-file)

bot_merge_vcf$Exposure<-round(bot_merge_vcf$Exposure,0)

bot_new_merge_vcf<-left_join(bot_merge_vcf%>%select(new_id,time,dose,batch,Signature,Exposure),bot_merge_vcf%>%filter(Signature%in%c("v3_2","v3_13"))%>%group_by(new_id)%>%dplyr::summarise(`2and13sum`=sum(Exposure)))

snv_merge_vcf<-left_join(bot_new_merge_vcf,cov_merge_vcf%>%select(id,new_id,covered_length),by=c("new_id"="new_id"))%>%
  plyr::rename(c("id"="old_id","new_id"="id"))

snv_merge_vcf<-snv_merge_vcf%>%mutate(tot_mut_rate=`2and13sum`/covered_length*1000000)%>%mutate(mut_rate=Exposure/(covered_length*(151*2-110)/(151*2))*1000000)



A3B_snv_merge_vcf<-snv_merge_vcf%>%filter(batch%in%c("bat1"))%>%mutate(APOBEC="A3B")%>%filter(!grepl("TP53",id))

colnames(A3B_snv_merge_vcf)[1]<-"id"

sum_snv_merge_vcf<-rbind(A3A_snv_merge_vcf,A3B_snv_merge_vcf)


sat_df<-read.csv("/home/users/ayh/Projects/99_scripts/annotation/botseq/snv/satellite_hg19_rename.bed",header=F,sep="\t")%>%as.tibble()%>%select(V1,V2,V3)
colnames(sat_df)<-c("CHROM","POS1","POS2")
sat_df<-sat_df%>%mutate(dist=POS2-POS1+1)%>%filter(!grepl("GL",CHROM)|!grepl("Y",CHROM))


g_length<-3054815472-sum(sat_df$dist)


sum_snv_merge_vcf<-sum_snv_merge_vcf%>%mutate(cor_mut_rate=mut_rate*g_length/1000000*2)%>%
  mutate(cor_mut_rate=round(cor_mut_rate))


###3. fitting####
#################

tot_merge_df<-rbind(APOBEC_blank_sig_df5%>%mutate(time="48h")%>%filter(`m/d`=="D")%>%filter(blank=="X")%>%
                      select(id,SBS,Exposure,APOBEC,dose,TP53,time)%>%mutate(order=ifelse(TP53=="WT",1,3)),
                    sum_snv_merge_vcf%>%mutate(TP53="WT")%>%
                      select(id,Signature,cor_mut_rate,APOBEC,dose,TP53,time)%>%mutate(order=2)%>%
                      plyr::rename(c("Signature"="SBS","cor_mut_rate"="Exposure"))%>%
                      mutate(SBS=gsub("v3_","SBS",SBS))
)
sum_snv_merge_vcf%>%mutate(TP53="WT")%>%
  select(id,Signature,cor_mut_rate,APOBEC,dose,TP53,time)%>%mutate(order=2)%>%
  plyr::rename(c("Signature"="SBS","cor_mut_rate"="Exposure"))%>%filter(SBS=="v3_2")%>%print(n=100)

tot_merge_df%>%
  ggplot(aes(x=id,y=Exposure,col=SBS))+
  geom_bar(stat="identity")+
  facet_wrap(~APOBEC+order+dose,scales = "free_x")

#id_df<-read_tsv("/home/users/ayh/Projects/27_A3B/06_Figure_code/edit_figure/id_list.txt")
id_df<-read_tsv("/home/users/ayh/Projects/27_A3B/06_Figure_code/edit_figure/id_list.v2.txt")


tot_merge_df<-tot_merge_df%>%filter(id%in%id_df$id)

tot_merge_df$id<-factor(tot_merge_df$id,levels=id_df$id)
tot_merge_df<-tot_merge_df%>%mutate(SBS=gsub("v3_","SBS",SBS))%>%filter(SBS!="Unexplained")
tot_merge_df$SBS<-factor(tot_merge_df$SBS,levels=c("SBS2","SBS13","SBS1","SBS18","SBS5","SBS40"))
tot_merge_df<-tot_merge_df%>%mutate(colo="black")


#pdf("/home/users/ayh/Projects/27_A3B/06_Figure_code/edit_figure/Fig2/Fig2.1_mutation_count.A3A.pdf",width=20,height=15)

tot_merge_df$SBS<-factor(tot_merge_df$SBS,levels=c("SBS2","SBS13","SBS1","SBS5","SBS18","SBS40"))
p1<-tot_merge_df%>%filter(APOBEC=="A3A")%>%
  ggplot(aes(x=id,y=Exposure,fill=SBS,col=colo))+
  geom_hline(yintercept=seq(1000,5000,by=1000),linetype="dashed")+
  
  geom_bar(stat="identity",width=0.8)+
  theme_classic()+
  theme(axis.text.x=element_blank(),
        axis.text.y=element_text(size=40),
        axis.title.y=element_text(size=45),
        strip.text=element_text(size=20),
        strip.background=element_rect(colour=NA,fill=NA),
        #panel.background=element_rect(fill=NA,color="black"),
        axis.ticks.x=element_blank(),
        legend.title=element_blank(),
        legend.position="top",
        #legend.key=element_rect(size=5,),
        #legend.text=element_text(size=5),
        axis.ticks.length=unit(.5, "cm"),
        axis.ticks=element_line(colour="black",size=3)
  )+
  scale_fill_manual(values=c("#DE2E24","#000000","#f58231","#9a6324","#469990","#ffd8b1"))+
  scale_color_manual(values=c("black"))+
  xlab("")+
  scale_y_continuous(
    breaks=seq(0,5000,by=1000),
    labels=seq(0,5000,by=1000),
    expand=c(0,0),limits=c(0,5600)
  )+
  ylab("# of SNVs")

tot_merge_df$SBS<-factor(tot_merge_df$SBS,levels=c("SBS1","SBS5","SBS18","SBS40","SBS2","SBS13"))

  
tot_merge_df<-tot_merge_df%>%mutate(new_Exposure=ifelse(SBS%in%c("SBS2","SBS13"),Exposure,-Exposure))
tot_merge_df$SBS<-factor(tot_merge_df$SBS,levels=c("SBS40","SBS18","SBS5","SBS1","SBS2","SBS13"))

p12<-tot_merge_df%>%filter(APOBEC=="A3A")%>%filter(SBS%in%c("SBS2","SBS13"))%>%
  ggplot(aes(x=id,y=new_Exposure,fill=SBS,col=colo))+
  geom_hline(yintercept=seq(0,8000,by=2000),linetype="dashed")+
  
  geom_bar(stat="identity",width=0.8)+
  theme_classic()+
  theme(#axis.text.x=element_blank(),
    axis.text.y=element_text(size=40),
    axis.title.y=element_text(size=45),
    axis.text.x=element_text(angle=90,hjust=1),
    strip.text=element_text(size=20),
    strip.background=element_rect(colour=NA,fill=NA),
    #panel.background=element_rect(fill=NA,color="black"),
    axis.ticks.x=element_blank(),
    legend.title=element_blank(),
    legend.position="top",
    #legend.key=element_rect(size=5,),
    #legend.text=element_text(size=5),
    axis.ticks.length=unit(.5, "cm"),
    axis.ticks=element_line(colour="black",size=3),
    #        panel.grid.major.x = element_blank()
  )+
  scale_fill_manual(values=c("#DE2E24","#000000"))+
  scale_color_manual(values=c("black"))+
  xlab("")+
  scale_y_continuous(
    breaks=seq(0,10000,by=2000),
    labels=seq(0,10000,by=2000),
    expand=c(0,0),limits=c(0,10500)
  )+
  ylab("# of SNVs")
p12
#p1
dev.off()


pdf("/home/users/ayh/Projects/27_A3B/06_Figure_code/edit_figure/Fig2/Fig2.1_mutation_count.A3A.v7.pdf",width=20,height=15)
p12
dev.off()


p22<-tot_merge_df%>%filter(APOBEC=="A3B")%>%filter(SBS%in%c("SBS2","SBS13"))%>%
  ggplot(aes(x=id,y=new_Exposure,fill=SBS,col=colo))+
  geom_hline(yintercept=seq(0,8000,by=2000),linetype="dashed")+
  
  geom_bar(stat="identity",width=0.8)+
  theme_classic()+
  theme(axis.text.x=element_blank(),
        axis.text.y=element_text(size=40),
        axis.title.y=element_text(size=45),
        strip.text=element_text(size=20),
        strip.background=element_rect(colour=NA,fill=NA),
        #panel.background=element_rect(fill=NA,color="black"),
        axis.ticks.x=element_blank(),
        legend.title=element_blank(),
        legend.position="none",
        #legend.key=element_rect(size=5,),
        #legend.text=element_text(size=5),
        axis.ticks.length=unit(.5, "cm"),
        axis.ticks=element_line(colour="black",size=3),
        #        panel.grid.major.x = element_blank()
  )+
  scale_fill_manual(values=c("#DE2E24","#000000"))+
  scale_color_manual(values=c("black"))+
  xlab("")+
  scale_y_continuous(
    breaks=seq(0,10000,by=2000),
    labels=seq(0,10000,by=2000),
    expand=c(0,0),limits=c(0,10500)
  )+
  ylab("# of SNVs")
p22
pdf("/home/users/ayh/Projects/27_A3B/06_Figure_code/edit_figure/Fig2/Fig2.1_mutation_count.A3B.v7.pdf",width=20,height=15)
p22
dev.off()


##raw count##
tot_merge_df$SBS<-factor(tot_merge_df$SBS,levels=c("SBS1","SBS2","SBS5","SBS13","SBS18","SBS40"))

tot_merge_df%>%select(-new_Exposure)%>%spread(SBS,Exposure)%>%print(n=100)%>%
  arrange(APOBEC,TP53,dose,time)%>%
  mutate("SBS2+SBS13"=SBS2+SBS13,
         "SBS5+SBS40"=SBS5+SBS40)%>%
  mutate(rate=`SBS5+SBS40`/`SBS2+SBS13`)%>%
  print(n=100)


tot_merge_df%>%select(-new_Exposure)%>%spread(SBS,Exposure)%>%
  arrange(APOBEC,TP53,dose,time)%>%
  mutate("sum_2_13"=SBS2+SBS13,
         "sum_5_40"=SBS5+SBS40)%>%
  filter(!grepl("bot",id))%>%
  filter(!grepl("bat",id))%>%
  write.table("/home/users/ayh/Projects/27_A3B/06_Figure_code/edit_figure/Fig2/clone_snv.txt",
              sep="\t",
              quote=F,
              row.names=F)

tot_merge_df
tot_merge_df%>%select(-new_Exposure)%>%spread(SBS,Exposure)%>%
  arrange(APOBEC,TP53,dose,time)%>%
  mutate("sum_2_13"=SBS2+SBS13,
         "sum_5_40"=SBS5+SBS40)%>%
  filter(grepl("bot",id)|grepl("bat",id))%>%
  #filter(!grepl("bat",id))%>%
  write.table("/home/users/ayh/Projects/27_A3B/06_Figure_code/edit_figure/Fig2/bot_snv.txt",
              sep="\t",
              quote=F,
              row.names=F)

##fitting bar_plot##
####################

tot_merge_bar_df<-tot_merge_df%>%filter(SBS%in%c("SBS2","SBS13"))%>%
  select(-Exposure)%>%
  spread(SBS,new_Exposure)%>%
  mutate(`2and13sum`=SBS2+SBS13)
tot_merge_bar_df<-tot_merge_bar_df%>%mutate(cond=paste(dose,TP53,sep="_"))#%>%
tot_merge_bar_df<-tot_merge_bar_df%>%mutate(seqtype=ifelse(grepl("bot",id)|grepl("bat",id),"bot","clone"))
tot_merge_bar_df$dose<-as.character(tot_merge_bar_df$dose)
tot_merge_bar_df$seqtype<-factor(tot_merge_bar_df$seqtype,levels=c("clone","bot"))

tot_merge_bar_df%>%filter(seqtype=="clone")
tot_merge_bar_df%>%print(n=100)

tot_merge_bar_df<-tot_merge_bar_df%>%
  mutate(dose=ifelse(id=="A3A_C3_1st_bat1_0h_100ng-1","CTRL",dose))%>%
  mutate(dose=ifelse(grepl("A3B_1st_C5_0h_100ng/ml",id),"CTRL",dose))
tot_merge_bar_df$dose
tot_merge_bar_df$dose<-factor(tot_merge_bar_df$dose,levels=tot_merge_bar_df$dose%>%unique())

##t_A3A_clone
tot_merge_bar_df%>%filter(seqtype=="clone")%>%filter(APOBEC=="A3A")%>%
  group_by(APOBEC,TP53)%>%
  rstatix::t_test(`2and13sum`~dose,p.adjust.method = "BH",alternative = "less")%>%
  rstatix::add_xy_position()


tot_merge_bar_df%>%filter(seqtype!="clone")%>%filter(APOBEC=="A3A")%>%
  filter(dose!="CTRL")%>%
  group_by(APOBEC)%>%
  rstatix::t_test(`2and13sum`~dose,p.adjust.method = "BH")%>%
  rstatix::add_xy_position()

t2<-tot_merge_bar_df%>%filter(seqtype=="clone")%>%
  group_by(APOBEC,dose)%>%
  rstatix::t_test(`2and13sum`~TP53,p.adjust.method = "BH")%>%
  rstatix::add_xy_position()%>%
  rstatix::adjust_pvalue(method="BH")%>%
  mutate(p.adj.signif=ifelse(p.adj<0.5,"*","ns"))

tot_merge_bar_df%>%filter(seqtype!="clone")%>%filter(APOBEC=="A3B")%>%
  #  filter(dose!="CTRL")%>%
  group_by(APOBEC)%>%
  rstatix::t_test(`2and13sum`~dose,p.adjust.method = "BH")%>%
  rstatix::add_xy_position()



#  filter(clone==c)
library(ggprism)
tot_merge_bar_df%>%filter(seqtype!="clone")%>%
  group_by(APOBEC,TP53)%>%
  rstatix::t_test(`2and13sum`~dose,p.adjust.method = "BH",alternative = "less")
filter(clone==c)
t
p_A3A_bar<-tot_merge_bar_df%>%filter(APOBEC=="A3A")%>%filter(seqtype=="clone")%>%
  ggplot(aes(x=dose,y=`2and13sum`,col=TP53))+
  geom_boxplot(aes(col=TP53),outlier.shape = NA)+
  stat_boxplot(aes(ymin = ..lower.., ymax = ..upper..),outlier.shape = NA) +
  stat_boxplot(geom = "errorbar", aes(ymin = ..ymax..)) +
  stat_boxplot(geom = "errorbar", aes(ymax = ..ymin..)) +
  geom_point(position=position_jitterdodge(),aes(group=TP53),size=5)+
  #facet_wrap(~seqtype)+
  theme_classic()+
  scale_y_continuous(lim=c(0,5000))+
  theme(axis.text.x=element_text(size=40),
        axis.text.y=element_text(size=40),
        axis.title.y=element_text(size=45),
        #strip.text=element_text(size=20),
        #        strip.text=element_blank(),
        #        strip.background=element_rect(colour=NA,fill=NA),
        #      panel.background=element_rect(fill=NA,color="black"),
        
        #legend.title=element_blank(),
        legend.position="none",
        legend.key=element_rect(size=20,),
        legend.text=element_text(size=20),
        axis.ticks.length.y=unit(.5, "cm"),
        axis.ticks.length.x  =unit(.5, "cm"),
        #axis.ticks=element_line(colour="black",size=3),
        axis.ticks.y=element_line(colour="black",size=3),
        axis.ticks.x=element_line(colour="black",size=2)
  )
p_A3A_bar
ggsave("/home/users/ayh/Projects/27_A3B/06_Figure_code/edit_figure/FigS/p_A3A_bar.v2.pdf",p_A3A_bar,
       width=10,height=8)

p_A3A_bot_bar<-tot_merge_bar_df%>%filter(APOBEC=="A3A")%>%filter(seqtype!="clone")%>%
  #mutate(dose=ifelse(id=="A3A_C3_1st_bat1_0h_100ng-1","CTRL",dose))%>%
  ggplot(aes(x=dose,y=`2and13sum`,col=TP53))+
  geom_boxplot(aes(col=TP53),outlier.shape = NA)+
  stat_boxplot(aes(ymin = ..lower.., ymax = ..upper..),outlier.shape = NA) +
  stat_boxplot(geom = "errorbar", aes(ymin = ..ymax..)) +
  stat_boxplot(geom = "errorbar", aes(ymax = ..ymin..)) +
  geom_point(position=position_jitterdodge(),aes(group=TP53),size=5)+
  #facet_wrap(~seqtype)+
  theme_classic()+
  scale_y_continuous(lim=c(0,10500))+
  theme(axis.text.x=element_text(size=40),
        axis.text.y=element_text(size=40),
        axis.title.y=element_text(size=45),
        #strip.text=element_text(size=20),
        #        strip.text=element_blank(),
        #        strip.background=element_rect(colour=NA,fill=NA),
        #      panel.background=element_rect(fill=NA,color="black"),
        
        #legend.title=element_blank(),
        legend.position="none",
        legend.key=element_rect(size=20,),
        legend.text=element_text(size=20),
        axis.ticks.length.y=unit(.5, "cm"),
        axis.ticks.length.x  =unit(.5, "cm"),
        #axis.ticks=element_line(colour="black",size=3),
        axis.ticks.y=element_line(colour="black",size=3),
        axis.ticks.x=element_line(colour="black",size=2)
  )
p_A3A_bot_bar
ggsave("/home/users/ayh/Projects/27_A3B/06_Figure_code/edit_figure/FigS/p_A3A_bot_bar.v2.pdf",p_A3A_bot_bar,
       width=10,height=8)

p_A3B_bot_bar<-tot_merge_bar_df%>%filter(APOBEC=="A3B")%>%filter(seqtype!="clone")%>%
  #mutate(dose=ifelse(id=="A3A_C3_1st_bat1_0h_100ng-1","CTRL",dose))%>%
  ggplot(aes(x=dose,y=`2and13sum`,col=TP53))+
  geom_boxplot(aes(col=TP53),outlier.shape = NA)+
  stat_boxplot(aes(ymin = ..lower.., ymax = ..upper..),outlier.shape = NA) +
  stat_boxplot(geom = "errorbar", aes(ymin = ..ymax..)) +
  stat_boxplot(geom = "errorbar", aes(ymax = ..ymin..)) +
  geom_point(position=position_jitterdodge(),aes(group=TP53),size=5)+
  #facet_wrap(~seqtype)+
  theme_classic()+
  scale_y_continuous(lim=c(0,500))+
  theme(axis.text.x=element_text(size=40),
        axis.text.y=element_text(size=40),
        axis.title.y=element_text(size=45),
        #strip.text=element_text(size=20),
        #        strip.text=element_blank(),
        #        strip.background=element_rect(colour=NA,fill=NA),
        #      panel.background=element_rect(fill=NA,color="black"),
        
        #legend.title=element_blank(),
        legend.position="none",
        legend.key=element_rect(size=20,),
        legend.text=element_text(size=20),
        axis.ticks.length.y=unit(.5, "cm"),
        axis.ticks.length.x  =unit(.5, "cm"),
        #axis.ticks=element_line(colour="black",size=3),
        axis.ticks.y=element_line(colour="black",size=3),
        axis.ticks.x=element_line(colour="black",size=2)
  )
p_A3B_bot_bar
ggsave("/home/users/ayh/Projects/27_A3B/06_Figure_code/edit_figure/FigS/p_A3B_bot_bar.v2.pdf",p_A3B_bot_bar,
       width=10,height=8)

p_A3B_bar<-tot_merge_bar_df%>%filter(APOBEC=="A3B")%>%filter(seqtype=="clone")%>%
  ggplot(aes(x=dose,y=`2and13sum`,col=TP53))+
  geom_boxplot(aes(col=TP53),outlier.shape = NA,
               position = position_dodge2(preserve = "single"))+
  stat_boxplot(aes(ymin = ..lower.., ymax = ..upper..),outlier.shape = NA,position = position_dodge2(preserve = "single")) +
  stat_boxplot(geom = "errorbar", aes(ymin = ..ymax..),position = position_dodge2(preserve = "single")) +
  stat_boxplot(geom = "errorbar", aes(ymax = ..ymin..),position = position_dodge2(preserve = "single")) +
  geom_point(position=position_jitterdodge(),aes(group=TP53),size=5)+
  #scale_colour_discrete(drop=FALSE)
  #facet_wrap(~seqtype)+
  theme_classic()+
  scale_y_continuous(lim=c(0,30))+
  theme(axis.text.x=element_text(size=40),
        axis.text.y=element_text(size=40),
        axis.title.y=element_text(size=45),
        #strip.text=element_text(size=20),
        #        strip.text=element_blank(),
        #        strip.background=element_rect(colour=NA,fill=NA),
        #      panel.background=element_rect(fill=NA,color="black"),
        
        #legend.title=element_blank(),
        legend.position="none",
        legend.key=element_rect(size=20,),
        legend.text=element_text(size=20),
        axis.ticks.length.y=unit(.5, "cm"),
        axis.ticks.length.x  =unit(.5, "cm"),
        #axis.ticks=element_line(colour="black",size=3),
        axis.ticks.y=element_line(colour="black",size=3),
        axis.ticks.x=element_line(colour="black",size=2)
  )


ggsave("/home/users/ayh/Projects/27_A3B/06_Figure_code/edit_figure/FigS/p_A3B_bar.v2.pdf",p_A3B_bar,
       width=10,height=8)






##signature of clonal lines##
############################

spec_files_to_read<-list.files("/home/users/ayh/Projects/27_A3B/03_sequencing/single_clone_sequencing/WGS/24_mutect2_strelka2_baseq/merge/clonal/new_filter/sig_with_18",
                               "signature_spectra.tsv",
                               full.names=T)

spec_tmp<-lapply(spec_files_to_read,function(x){
  read_tsv(x)%>%mutate(id=gsub(".mutect2_.*","",basename(x)))%>%
    mutate(TP53=ifelse(grepl("TP53",id),"KO","WT"))%>%
    mutate(dose=ifelse(grepl("_3",id),"3ug/ml",
                       ifelse(grepl("100",id),"0.1ug/ml",
                              ifelse(grepl("Ctrl",id)|grepl("CTRL",id),"CTRL","none"))))%>%
    mutate(APOBEC=ifelse(grepl("A3A",id),"A3A","A3B"))
})

spec_merge_df<-do.call(rbind,spec_tmp)

spec_sum_merge_df<-left_join(
  spec_merge_df%>%group_by(TP53,dose,APOBEC,Substitution,Trinucleotide)%>%
    dplyr::summarise(sum_ori=median(Original))%>%ungroup()
  ,
  spec_merge_df%>%group_by(TP53,dose,APOBEC,Substitution,Trinucleotide)%>%
    dplyr::summarise(sum_ori=median(Original))%>%group_by(TP53,dose,APOBEC)%>%
    dplyr::summarise(tot_ori=sum(sum_ori))%>%ungroup()
)

spec_sum_merge_df<-spec_sum_merge_df%>%mutate(ratio=sum_ori/tot_ori)


spec_sum_merge_df<-spec_sum_merge_df%>%mutate(type_3=rep(c(1:96),14))

i="A3A"
j="0.1ug/ml"
k="WT"
palette.COSMIC.SNV.96<-c("#1EBFF0","#050708","#E62725","#CBCACB","#A1CF64","#EDC8C5")
library(cowplot)
for(i in c("A3A","A3B")){
  for(j in c("CTRL","0.1ug/ml","3ug/ml")){
    for(k in c("WT","KO")){
      norm_sig_std_df<-spec_sum_merge_df
      norm_sig_df<-norm_sig_std_df%>%filter(APOBEC==i,dose==j,TP53==k)
      if (nrow(norm_sig_df)==0){
        print(i)
        print(j)
        print(k)
        next
      }
      
      h1<-ggplot(norm_sig_df)+
        geom_hline(yintercept=seq(0,0.3,by=0.1)
                   ,linetype="dotted",col="grey")+
        geom_bar(mapping=aes(x=type_3,y=ratio,fill=Substitution),
                 stat="identity",
                 position="dodge",
                 width=0.5)+
        #  guides(fill = guide_legend(ncol = 2))+
        theme(#axis.text.x.bottom = element_blank(),
          axis.ticks.x = element_blank(),
          axis.ticks.y=element_line(size=1,colour="grey"),
          axis.ticks.length.y=unit(-0.25,"cm"),
          panel.spacing.x = unit(0, "mm"),
          axis.title.x = element_blank(),
          strip.background.x = element_blank(),
          strip.text.x = element_blank())+
        #facet_grid(.~Substitution, scales = "free_x")+
        theme(axis.text.x=element_text(angle=90,vjust=0.5,hjust=1,size=20,family="Consolas"),
              axis.text.y=element_text(size=55),
              axis.title.y=element_text(size=60)
        )+
        theme(panel.grid.major = element_blank(),
              panel.grid.minor = element_blank(),
              panel.background = element_blank(),
              panel.border = element_rect(colour = "grey", fill=NA, size=1),
        )+
        scale_x_continuous(
          breaks = norm_sig_df$type_3%>%unique(),
          labels = (norm_sig_df[1:96,])$Trinucleotide,
          expand = c(0.01, 0)
        )+
        scale_y_continuous(
          limits=c(0,0.3),
          expand=c(0,0),
          breaks=seq(0,0.3,by=0.1)
          
        )+
        xlab("")+ylab("")+
        scale_fill_manual(values=palette.COSMIC.SNV.96)+theme(legend.position = "none")+
        ylab("Composition ratio")
      
      h1
      h2<-ggplot(norm_sig_df)+
        geom_bar(mapping = aes(x = type_3, y = 1, fill = Substitution),
                 stat = "identity",
                 width = 1)+
        theme_void()+
        theme(panel.spacing.x = unit(0, "mm"),)+
        facet_grid(.~Substitution, scales = "free_x")+
        scale_fill_manual(values=palette.COSMIC.SNV.96)+
        theme(legend.position = "none")+
        theme(strip.text.x=element_text(size=40))
      
      h2
      
      legend <- plot_grid(get_legend(h2), get_legend(h1), ncol = 1)
      h1 <- h1 + theme(legend.position = "none")
      h2 <- h2 + theme(legend.position = "none")
      
      plot<-plot_grid(h2, h1, align = "v", ncol = 1, axis = "tblr", rel_heights = c(0.5, 5))
      plot
      cowplot::save_plot(paste("/home/users/ayh/Projects/27_A3B/06_Figure_code/edit_figure/Fig2/sig/",i,"_",gsub("/","_",j),"_",k,".v2.pdf",sep=""),plot,
                         ncol=1,
                         nrow=2,
                         base_asp=7,
                         #unit="px",
                         
                         device=cairo_pdf)
    }
  }
}


##signature of botseq
######################




A3A_bot_sig_tmp<-list.files("/home/users/ayh/Projects/27_A3B/03_sequencing/single_clone_sequencing/WGS/15_bot_seq/A3A/integrate/vcf/240321/sig/",
                            "signature_spectra.tsv",
                            full.names=T)

A3A_bot_spec_tmp<-lapply(A3A_bot_sig_tmp,function(x){
  read_tsv(x)%>%mutate(id=gsub(".snp.*","",basename(x)))%>%
    mutate(TP53=ifelse(grepl("TP53",id),"KO","WT"))%>%
    mutate(dose=ifelse(grepl("_3",id),"3ug/ml",
                       ifelse(grepl("100",id),"0.1ug/ml",
                              ifelse(grepl("Ctrl",id)|grepl("CTRL",id),"CTRL","none"))))%>%
    mutate(APOBEC=ifelse(grepl("A3A",id),"A3A","A3B"))%>%
    mutate(batch=ifelse(grepl("bat1",id),"bat1",
                        ifelse(grepl("bat2",id),"bat2",
                               "bat3")))
})

A3A_bot_spec_merge_df<-do.call(rbind,A3A_bot_spec_tmp)
A3A_bot_spec_merge_df<-A3A_bot_spec_merge_df%>%filter(batch=="bat1"|batch=="bat3")%>%filter(grepl("48h",id)|grepl("0h",id))


A3A_bot_spec_merge_df<-A3A_bot_spec_merge_df%>%mutate(dose=ifelse(grepl("0h",id),"CTRL",dose))%>%filter(APOBEC=="A3A")
A3A_spec_sum_merge_df<-left_join(
  A3A_bot_spec_merge_df%>%group_by(TP53,dose,APOBEC,Substitution,Trinucleotide)%>%
    dplyr::summarise(sum_ori=median(Original))%>%ungroup()
  ,
  A3A_bot_spec_merge_df%>%group_by(TP53,dose,APOBEC,Substitution,Trinucleotide)%>%
    dplyr::summarise(sum_ori=median(Original))%>%group_by(TP53,dose,APOBEC)%>%
    dplyr::summarise(tot_ori=sum(sum_ori))%>%ungroup()
)

A3A_spec_sum_merge_df<-A3A_spec_sum_merge_df%>%mutate(ratio=sum_ori/tot_ori)


A3A_spec_sum_merge_df<-A3A_spec_sum_merge_df%>%mutate(type_3=rep(c(1:96),3))


A3B_bot_sig_tmp<-list.files("/home/users/ayh/Projects/27_A3B/03_sequencing/single_clone_sequencing/WGS/15_bot_seq/A3A/integrate/vcf/240321/sig/",
                            "signature_spectra.tsv",
                            full.names=T)

A3B_bot_spec_tmp<-lapply(A3B_bot_sig_tmp,function(x){
  read_tsv(x)%>%mutate(id=gsub(".snp.*","",basename(x)))%>%
    mutate(TP53=ifelse(grepl("TP53",id),"KO","WT"))%>%
    mutate(dose=ifelse(grepl("-3",id),"3ug/ml",
                       ifelse(grepl("48h-",id)|grepl("48h_",id),"0.1ug/ml",
                              ifelse(grepl("0h",id),"CTRL","3ug/ml")
                       )))%>%
    mutate(APOBEC=ifelse(grepl("A3A",id),"A3A","A3B"))
})

A3B_bot_spec_merge_df<-do.call(rbind,A3B_bot_spec_tmp)
A3B_bot_spec_merge_df<-A3B_bot_spec_merge_df%>%filter(APOBEC=="A3B")

A3B_spec_sum_merge_df<-left_join(
  A3B_bot_spec_merge_df%>%group_by(TP53,dose,APOBEC,Substitution,Trinucleotide)%>%
    dplyr::summarise(sum_ori=median(Original))%>%ungroup()
  ,
  A3B_bot_spec_merge_df%>%group_by(TP53,dose,APOBEC,Substitution,Trinucleotide)%>%
    dplyr::summarise(sum_ori=median(Original))%>%group_by(TP53,dose,APOBEC)%>%
    dplyr::summarise(tot_ori=sum(sum_ori))%>%ungroup()
)

A3B_spec_sum_merge_df<-A3B_spec_sum_merge_df%>%mutate(ratio=sum_ori/tot_ori)


A3B_spec_sum_merge_df<-A3B_spec_sum_merge_df%>%mutate(type_3=rep(c(1:96),3))

bot_spec_sum_merge_df<-rbind(
  A3A_spec_sum_merge_df,
  A3B_spec_sum_merge_df
)


bot_spec_sum_merge_df%>%arrange(-ratio)
i
i="A3A"
bot_spec_sum_merge_df%>%filter(APOBEC=="A3A",TP53=="WT",dose=="3ug/ml")%>%print(n=100)
(bot_spec_sum_merge_df)$ratio%>%max()
library(cowplot)
for(i in c("A3A","A3B")){
  for(j in c("CTRL","0.1ug/ml","3ug/ml")){
    for(k in c("WT")){
      norm_sig_std_df<-bot_spec_sum_merge_df
      norm_sig_df<-norm_sig_std_df%>%filter(APOBEC==i,dose==j,TP53==k)
      if (nrow(norm_sig_df)==0){
        print(i)
        print(j)
        print(k)
        next
      }
      
      h1<-ggplot(norm_sig_df)+
        geom_hline(yintercept=seq(0,0.3,by=0.1)
                   ,linetype="dotted",col="grey")+
        geom_bar(mapping=aes(x=type_3,y=ratio,fill=Substitution),
                 stat="identity",
                 position="dodge",
                 width=0.5)+
        #  guides(fill = guide_legend(ncol = 2))+
        theme(#axis.text.x.bottom = element_blank(),
          axis.ticks.x = element_blank(),
          axis.ticks.y=element_line(size=1,colour="grey"),
          axis.ticks.length.y=unit(-0.25,"cm"),
          panel.spacing.x = unit(0, "mm"),
          axis.title.x = element_blank(),
          strip.background.x = element_blank(),
          strip.text.x = element_blank())+
        #facet_grid(.~Substitution, scales = "free_x")+
        theme(axis.text.x=element_text(angle=90,vjust=0.5,hjust=1,size=20,family="Consolas"),
              axis.text.y=element_text(size=55),
              axis.title.y=element_text(size=60)
        )+
        theme(panel.grid.major = element_blank(),
              panel.grid.minor = element_blank(),
              panel.background = element_blank(),
              panel.border = element_rect(colour = "grey", fill=NA, size=1),
        )+
        scale_x_continuous(
          breaks = norm_sig_df$type_3%>%unique(),
          labels = (norm_sig_df[1:96,])$Trinucleotide,
          expand = c(0.01, 0)
        )+
        scale_y_continuous(
          limits=c(0,0.3),
          expand=c(0,0),
          breaks=seq(0,0.3,by=0.1)
          
        )+
        xlab("")+ylab("")+
        scale_fill_manual(values=palette.COSMIC.SNV.96)+theme(legend.position = "none")+
        ylab("Composition ratio")
      
      h1
      h2<-ggplot(norm_sig_df)+
        geom_bar(mapping = aes(x = type_3, y = 1, fill = Substitution),
                 stat = "identity",
                 width = 1)+
        theme_void()+
        theme(panel.spacing.x = unit(0, "mm"),)+
        facet_grid(.~Substitution, scales = "free_x")+
        scale_fill_manual(values=palette.COSMIC.SNV.96)+
        theme(legend.position = "none")+
        theme(strip.text.x=element_text(size=40))
      
      h2
      
      legend <- plot_grid(get_legend(h2), get_legend(h1), ncol = 1)
      h1 <- h1 + theme(legend.position = "none")
      h2 <- h2 + theme(legend.position = "none")
      
      plot<-plot_grid(h2, h1, align = "v", ncol = 1, axis = "tblr", rel_heights = c(0.5, 5))
      plot
      cowplot::save_plot(paste("/home/users/ayh/Projects/27_A3B/06_Figure_code/edit_figure/Fig2/sig/",i,"_",gsub("/","_",j),"_",k,".bot.v3.pdf",sep=""),plot,
                         ncol=1,
                         nrow=2,
                         base_asp=7,
                         #unit="px",
                         
                         device=cairo_pdf)
    }
  }
}








####bacckup of table###
#######################

p<-ggplot(t%>%filter(APOBEC=="A3A"),aes(x=id,y=Exposure,col="APOBEC"))+
  geom_hline(yintercept=seq(1000,5000,by=1000),linetype="dotted")+
  geom_bar(stat="identity",width=0.8,aes(fill=SBS))+
  facet_grid(~TP53,scales="free_x",space="free",margin=10)+
  theme_test()+
  theme(axis.text.x=element_blank(),
        axis.text.y=element_text(size=40),
        axis.title.y=element_text(size=45),
        strip.text=element_text(size=20),
        strip.background=element_rect(colour=NA,fill=NA),
        panel.background=element_rect(fill=NA,color="black"),
        axis.ticks.x=element_blank(),
        legend.title=element_blank(),
        legend.position="top",
        legend.key=element_rect(size=20,),
        legend.text=element_text(size=20),
        axis.ticks.length=unit(.5, "cm"),
        axis.ticks=element_line(colour="black",size=3)
  )+
  scale_color_manual(values=c("black"))+
  scale_fill_manual(values=c("#f58231","#DE2E24","#9a6324","#000000","#469990","#ffd8b1"),
                    breaks=c("SBS1","SBS2","SBS5","SBS13","SBS18","SBS40"))+
  xlab("")+
  scale_y_continuous(
    breaks=seq(0,5500,by=1000),
    labels=seq(0,5500,by=1000),
    expand=c(0,0),limits=c(0,6000)
  )+
  ylab("# of SNVs")+
  add_pvalue(t_pval%>%filter(APOBEC=="A3A"),
             xmin="xmin",
             xmax="xmax",
             tip.length = 0.01,
             label.size=10
  )+
  guides(fill= guide_legend(nrow = 1),
         col="none")
tot_merge_df%>%filter(SBS%in%c("SBS13","SBS2"))%>%select(id,SBS,new_Exposure,APOBEC,dose,TP53,time,)%>%spread(SBS,new_Exposure)%>%mutate(sum=SBS2+SBS13)%>%
  mutate(SBS2_ratio=SBS2/sum,SBS13_ratio=SBS13/sum)%>%
  gather(SBS,ratio,SBS2_ratio,SBS13_ratio)%>%
  mutate(group=ifelse(grepl("bat",id),"botseq","std"))%>%
  filter(APOBEC=="A3A")%>%
  filter(dose!="CTRL")%>%
  ggplot(aes(x=group,y=ratio,col=SBS))+
  geom_boxplot(#position = position_dodge(width =0.9),
    outlier.shape=NA,
    linetype="dashed") +
  stat_boxplot(aes(ymin = ..lower.., ymax = ..upper..), outlier.shape = NA,
               #position = position_dodge(width =0.9)
  ) +
  stat_boxplot(geom = "errorbar", aes(ymin = ..ymax..)) +
  stat_boxplot(geom = "errorbar", aes(ymax = ..ymin..)) +
  geom_point(position = position_jitterdodge(jitter.width = 0.2,
  ))+
  
  theme_bw()+
  scale_y_continuous(breaks=seq(0,1,by=0.2),
                     labels=seq(0,1,by=0.2),
                     lim=c(0,1))+
  facet_wrap(~APOBEC)

