library(data.table)
library(magrittr)
library(gtools)
library(pbmcapply)
library(BSgenome.Hsapiens.Ensembl.GRCh37.jolim)
library(ggplot2)
library(ggbreak)
# library(ggthemes)
library(lemon)
library(scales)
library(flexmix) # flexmix
library(dplyr)
library(tidyverse)



library(scales)
library(dplyr)
library(tidyverse)

A3A_cl_merge_f_br_sv_df<-read_tsv("/home/users/ayh/Projects/27_A3B/03_sequencing/single_clone_sequencing/WGS/30_clustered_mutation/A3A_cl_sv_df.v3.txt")
A3A_cl_merge_f_br_sv_sim_df<-A3A_cl_merge_f_br_sv_df%>%select(id,`#CHROM`,POS,REF,ALT,sig_cont,cluster,cluster_id)%>%mutate(Project_Code="A3A")%>%unique()

sv_excl_df<-read_tsv("/home/users/ayh/Projects/27_A3B/03_sequencing/single_clone_sequencing/WGS/31_exclude_cluster/exclude_cl_mut_df.v2.txt")





overlap_excl_df<-read_tsv("/home/users/ayh/Projects/27_A3B/03_sequencing/single_clone_sequencing/WGS/31_exclude_cluster/overlapped_cluster.v2.txt")
##overlap_ex
miss_phs_excl_df<-read_tsv("/home/users/ayh/Projects/27_A3B/03_sequencing/single_clone_sequencing/WGS/31_exclude_cluster/miss_phased_cluster.txt")

miss_phs_excl_df<-miss_phs_excl_df%>%mutate(info=paste(id,`#CHROM`,cluster_id,sep="_"))
#sv_excl_df%>%filter(grepl("A3A",info))
A3A_sv_df<-A3A_cl_merge_f_br_sv_sim_df%>%mutate(info=paste(id,`#CHROM`,cluster_id,sep="_"))%>%
  filter(!info%in%overlap_excl_df$info)%>%
  filter(!info%in%sv_excl_df$info)%>%
  filter(!info%in%miss_phs_excl_df$info)
A3A_sv_df
A3A_sv_sum_df<-A3A_sv_df%>%
  group_by(id,cluster,`#CHROM`,cluster_id)%>%
  dplyr::summarise("C_mut"=sum(grepl("C>",sig_cont)),
                   "T_mut"=sum(grepl("T>",sig_cont)),
                   "TCN_mut"=sum(grepl("TC>",sig_cont)),
                   "nonTCN_mut"=sum(grepl("C>",sig_cont)&!grepl("TC>",sig_cont)),
                   "TCN_C_mut"=sum(grepl("TC>",sig_cont)&REF=="C"),
                   "TCN_G_mut"=sum(grepl("TC>",sig_cont)&REF=="G"),
                   "std_C_mut"=sum(REF=="C"),
                   "std_G_mut"=sum(REF=="G"))
#)

A3A_sv_sum_df%>%filter(cluster=="kataegis")%>%filter(T_mut==0)%>%
  filter(nonTCN_mut>0)



left_join(A3A_sv_df%>%filter(cluster%in%c("kataegis","omikli")),A3A_sv_sum_df%>%filter(cluster%in%c("kataegis","omikli")))%>%
  write.table("/home/users/ayh/Projects/27_A3B/03_sequencing/single_clone_sequencing/WGS/32_cluster_signature/00_original_files/A3A_clsuter_SNV.txt",
              sep="\t",
              quote=F,
              row.names=F,
  )


left_join(A3A_sv_df%>%filter(cluster%in%c("kataegis")),A3A_sv_sum_df%>%filter(cluster%in%c("kataegis")))%>%
  write.table("/home/users/ayh/Projects/27_A3B/03_sequencing/single_clone_sequencing/WGS/32_cluster_signature/00_original_files/A3A_clsuter_SNV.kat.txt",
              sep="\t",
              quote=F,
              row.names=F,
  )


left_join(A3A_sv_df%>%filter(cluster%in%c("omikli")),A3A_sv_sum_df%>%filter(cluster%in%c("omikli")))%>%
  write.table("/home/users/ayh/Projects/27_A3B/03_sequencing/single_clone_sequencing/WGS/32_cluster_signature/00_original_files/A3A_clsuter_SNV.omikli.txt",
              sep="\t",
              quote=F,
              row.names=F,
  )


left_join(A3A_sv_df%>%filter(cluster%in%c("kataegis")),A3A_sv_sum_df%>%filter(cluster%in%c("kataegis")))%>%filter(!grepl("TC>",sig_cont))%>%
  write.table("/home/users/ayh/Projects/27_A3B/03_sequencing/single_clone_sequencing/WGS/32_cluster_signature/00_original_files/A3A_clsuter_SNV.kat.nonTCN.txt",
              sep="\t",
              quote=F,
              row.names=F,
  )


left_join(A3A_sv_df%>%filter(cluster%in%c("kataegis")),A3A_sv_sum_df%>%filter(cluster%in%c("kataegis")))%>%filter(!grepl("TC>",sig_cont))%>%mutate(ID=".",QUAL=".",FILTER=".",INFO=".",FORMAT=".")%>%
  select(`#CHROM`,POS,ID,REF,ALT,QUAL,FILTER,INFO,FORMAT,id)%>%
  write.table("/home/users/ayh/Projects/27_A3B/03_sequencing/single_clone_sequencing/WGS/32_cluster_signature/00_original_files/A3A_clsuter_SNV.kat.nonTCN.vcf",
              sep="\t",
              quote=F,
              row.names=F,
  )


library(string)
A3A_sv_new_sig_df<-A3A_sv_df%>%mutate(new_sig_cont=ifelse(REF%in%c("C","T"),sig_cont,
                                                          paste0(chartr("ATGC","TACG",substr(sig_cont,5,5)),REF,">",ALT,chartr("ATGC","TACG",substr(sig_cont,1,1)))))


###context making------------------------------------------------------------------------------------------------


contextorder96 <- paste0(
  rep(rep(c("A","C","G","T"),each=4),4), #firstbase
  rep(c("C","T"),each=48),
  ">",
  rep(c("A","G","T","A","C","G"), each=16),
  rep(c("A","C","G","T"),16) # lastbase
)

contextorder192 <- c(contextorder96,
                     paste0(
                       c("G"="C","C"="G","A"="T","T"="A")[substr(contextorder96,5,5)],
                       c("G"="C","C"="G","A"="T","T"="A")[substr(contextorder96,2,2)],
                       ">",
                       c("G"="C","C"="G","A"="T","T"="A")[substr(contextorder96,4,4)],
                       c("G"="C","C"="G","A"="T","T"="A")[substr(contextorder96,1,1)]
                     )
)


c12<-c("#21c4f0",
       "#525252",
       "#df3633",
       "#ecebe8",
       "#a5ca64",
       "#e7c1c2",
       "#215cf0",
       "#020202",
       "#691211",
       "#a6a394",
       "#546f26",
       "#bd5356"
       
       
       
)
##plot



#sim_df$sig_cont

###

A3A_sv_new_sig_mix_df<-left_join(A3A_sv_new_sig_df%>%filter(cluster%in%c("kataegis")),A3A_sv_sum_df%>%filter(cluster%in%c("kataegis")))%>%
  filter(std_C_mut>0&std_G_mut>0)


left_join(A3A_sv_new_sig_df%>%filter(cluster%in%c("kataegis")),A3A_sv_sum_df%>%filter(cluster%in%c("kataegis")))%>%
  mutate(main_ref=ifelse(std_C_mut>std_G_mut,"C",
                         ifelse(std_C_mut=="std_G_mut","C","G")))%>%
  write.table("/home/users/ayh/Projects/27_A3B/03_sequencing/single_clone_sequencing/WGS/32_cluster_signature/00_original_files/A3A_clsuter_SNV.kat.mix_strand.vcf",
              sep="\t",
              quote=F,
              row.names=F)
if(FALSE){
A3A_sv_new_sig_mix_df%>%mutate(main_ref=ifelse(std_C_mut>std_G_mut,"C",
                                               ifelse(std_C_mut=="std_G_mut","C","G")))%>%
  A3A_sv_new_sig_mix_df%>%
  filter(info=="A3A_C3_TP53_C3_100ng-2_X_4")

A3A_sv_new_sig_mix_other_df<-A3A_sv_new_sig_mix_df%>%mutate(main_ref=ifelse(std_C_mut>std_G_mut,"C",
                                                                            ifelse(std_C_mut=="std_G_mut","C","G")))%>%
  filter((REF!=main_ref))

A3A_sv_new_sig_mix_other_df$sig_cont%>%unique()


mix_df
mix_df<-A3A_sv_new_sig_mix_df%>%select(id,`#CHROM`,cluster_id,info)%>%unique()

A3A_sv_new_sig_mix_other_merge_df<-A3A_sv_new_sig_mix_df%>%mutate(main_ref=ifelse(std_C_mut>std_G_mut,"C",
                                                                                  ifelse(std_C_mut=="std_G_mut","C","G")))%>%
  filter((REF!=main_ref))

A3A_sv_new_sig_mix_other_merge_df<-A3A_sv_new_sig_mix_other_merge_df%>%mutate(new_sig_cont=gsub(">.","",sig_cont))

ref_16_df<-data.frame(new_sig_cont=paste0(rep(c("A","C","G","T"),each=4),"C",rep(c("A","C","G","T"),4)))%>%as.tibble()

other_16_count_df<-left_join(ref_16_df,A3A_sv_new_sig_mix_other_merge_df%>%
                               group_by(new_sig_cont)%>%
                               dplyr::summarise(n=n()))
other_16_count_df[is.na(other_16_count_df)]<-0
other_16_count_df$new_sig_cont<-factor(other_16_count_df$new_sig_cont,levels=ref_16_df$new_sig_cont)
p_other_16_pdf<-other_16_count_df%>%mutate(col_type="col")%>%
  ggplot(aes(x=new_sig_cont,y=n,col=col_type))+
  geom_bar(stat="identity")+
  theme_classic()+
  theme(axis.text.x=element_text(angle=90,vjust=0.5,size=40,family="Consolas"))+
  theme(axis.title=element_text(size=40),
        #     axis.text.x=element_text(size=40,angle=40,hjust=1),
        axis.text.y=element_text(size=40),
        strip.text=element_text(size=30),
        axis.ticks.length=unit(.5, "cm"),
        axis.ticks=element_line(colour="black",size=3)
  )+
  guides(colour="none")+
  ylab("freq")


p_other_16_pdf
save_plot("/home/users/ayh/Projects/27_A3B/06_Figure_code/edit_figure/Fig3/other_16.pdf",p_other_16_pdf,
          ncol=1,
          nrow=2,
          base_asp=7,
          #unit="px",
          
          device=cairo_pdf)



A3A_sv_new_sig_mix_other_merge_df%>%
  group_by(new_sig_cont)%>%
  dplyr::summarise(n=n())#%>%
#ggplot
ref_96_df<-data.frame(sig_cont=contextorder96)%>%as.tibble()
other_96_count_df<-left_join(ref_96_df,A3A_sv_new_sig_mix_other_merge_df%>%
                               group_by(sig_cont)%>%
                               dplyr::summarise(count=n()))
other_96_count_df[is.na(other_96_count_df)]<-0
#other_96_count_df%>%print(n=100)

other_96_count_df$count

other_96_count_df<-other_96_count_df%>%mutate(Substitution=substr(sig_cont,2,4))%>%
  mutate(type_3=c(1:96))
library(cowplot)
palette.COSMIC.SNV.96<-c("#1EBFF0","#050708","#E62725","#CBCACB","#A1CF64","#EDC8C5")
other_96_count_df%>%filter(grepl("TC>",sig_cont))%>%print(n=100)
h1<-ggplot(other_96_count_df)+
  geom_hline(yintercept=c(0:4),
             linetype="dotted",col="grey")+
  geom_bar(mapping=aes(x=type_3,y=count,fill=Substitution),
           stat="identity",
           position="dodge",
           width=0.5)+
  #  guides(fill = guide_legend(ncol = 2))+
  theme(#axis.text.x.bottom = element_blank(),
    #axis.ticks.x = element_blank(),
    axis.ticks.y=element_line(size=1,colour="grey"),
    axis.ticks.length.y=unit(-0.25,"cm"),
    panel.spacing.x = unit(0, "mm"),
    axis.title.x = element_blank(),
    strip.background.x = element_blank(),
    strip.text.x = element_blank())+
  #facet_grid(.~Substitution, scales = "free_x")+
  theme(axis.text.x=element_text(angle=90,vjust=0.5,hjust=1,size=20,family="Consolas"),
        axis.text.y=element_blank(),
        axis.title.y=element_text(size=60)
  )+
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.background = element_blank(),
        panel.border = element_rect(colour = "grey", fill=NA, size=1),
  )+
  scale_x_continuous(
    breaks = other_96_count_df$type_3%>%unique(),
    labels = contextorder96,
    expand = c(0.01, 0)
  )+
  scale_y_continuous(
    limits=c(0,4.25),
    expand=c(0,0),
    breaks=c(0:4),
    labels=c(0:4))+
  xlab("")+ylab("")+
  scale_fill_manual(values=palette.COSMIC.SNV.96)+theme(legend.position = "none")+
  ylab("# of SNVs")#+
  #scale_colour_manual(values="black")

h1
h2<-ggplot(other_96_count_df)+
  geom_bar(mapping = aes(x = type_3, y = 1, fill = Substitution),
           stat = "identity",
           width = 1)+
  theme_void()+
  theme(panel.spacing.x = unit(0, "mm"),)+
  facet_grid(.~Substitution, scales = "free_x")+
  scale_fill_manual(values=palette.COSMIC.SNV.96)+
  theme(legend.position = "none")+
  theme(strip.text.x=element_text(size=40))

h2

legend <- plot_grid(get_legend(h2), get_legend(h1), ncol = 1)
h1 <- h1 + theme(legend.position = "none")
h2 <- h2 + theme(legend.position = "none")

plot<-plot_grid(h2, h1, align = "v", ncol = 1, axis = "tblr", rel_heights = c(0.5, 5))
plot
save_plot("/home/users/ayh/Projects/27_A3B/06_Figure_code/edit_figure/Fig3/mix_other_sig.v2.pdf",plot,
          ncol=1,
          nrow=2,
          base_asp=7,
          #unit="px",
          
          device=cairo_pdf)







new_contextorder96<-gsub(">.","",contextorder96)%>%unique()



new_contexttable96 <-table(A3A_sv_new_sig_mix_other_merge_df$new_sig_cont)[new_contextorder96]
names(new_contexttable96)<-new_contextorder96
new_contexttable96[is.na(new_contexttable96)]=0



x<-barplot(new_contexttable96,las=2,
           col = rep(c12,each=16),
           border = rep(c12,each=16),
           cex.names=0.5,
           xaxt="n",
           beside="T",
           ylim=range(pretty(c(0,max(contexttable96))))
           #          legend=mut16
)
legend("topleft",
       legend = mut16,
       fill = c12,
       cex = 0.5,
       ncol=2,
       inset=c(-0.4,0))
legend("topleft",
       legend=".",bty="n",
       cex = 1,
       inset=c(-0.4,0.7))









A3A_sv_new_sig_mix_other_df%>%print(n=100)
range_df<-A3A_sv_new_sig_mix_df%>%filter(info%in%A3A_sv_new_sig_mix_other_df$info)%>%group_by(id,`#CHROM`,cluster_id)%>%
  dplyr::summarise(min_POS=min(POS),max_POS=max(POS))
range_df<-left_join(range_df,A3A_sv_new_sig_mix_other_df%>%mutate(strand=ifelse(main_ref=="C","-","+"))%>%select(id,`#CHROM`,cluster_id,strand)) ## strand is the other strand of main strand

library(Rsamtools)
fasta_file<-FaFile(file='/home/users/ayh/Projects/reference/genome/human/GRCh37/A3B/human_g1k_v37.rtTA.A3B_mcherry_vec.fa')
range_df<-range_df%>%unique()
kat_gr<-GRanges(seqnames=range_df$`#CHROM`,IRanges(start=(as.numeric(range_df$min_POS-1)), end=(as.numeric(range_df$max_POS)+1)),strand=range_df$strand)
refbase<-getSeq(fasta_file,kat_gr)
refbase<-(as.data.frame(refbase)$x)
ref_df<-data.frame("seq"=refbase,"strand"=range_df$strand)%>%as.tibble()
str_count(as.data.frame(refbase)$x[1],"TCA")
refbase
#ref_df[1]



contextorder32 <- paste0(
  rep(rep(c("A","C","G","T"),each=4),2), #firstbase
  rep(c("C","T"),each=16),
  
  #rep(c("A","G","T","A","C","G"), each=16),
  rep(c("A","C","G","T"),4) # lastbase
)
library(stringr)
rev_contextorder32<-stri_reverse(chartr("ATGC","TACG",contextorder32))



ref_tmp<-lapply(c(1:nrow(ref_df)) ,function(i){
  seq=(ref_df$seq)[i]
  strand=(ref_df$strand)[i]
  #  print(seq)
  #  print(strand)
  target_cont=NULL
  if(strand=="+"){
    target_cont=contextorder32
  }
  else{
    target_cont=rev_contextorder32
  }
  #print(target_cont)
  
  tmp<-lapply(target_cont,function(cont){
    str_count(seq,cont)
  })
  df<-do.call(rbind,tmp)%>%as.data.frame()%>%mutate(cont=contextorder32)
  return(df)
  
}
)

ref_tot_df<-do.call(rbind,ref_tmp)

ref_count_df<-ref_tot_df%>%group_by(cont)%>%
  dplyr::summarise(tot_count=sum(V1))
ref_count_df$cont<-factor(ref_count_df$cont,levels=contextorder32)
ref_count_sig_df<-rbind(
  ref_count_df%>%mutate(cont=paste0(substr(cont,1,2),">A",substr(cont,3,3))),
  ref_count_df%>%mutate(cont=paste0(substr(cont,1,2),">G",substr(cont,3,3))),
  ref_count_df%>%mutate(cont=ifelse(substr(cont,2,2)=="C",paste0(substr(cont,1,2),">T",substr(cont,3,3)),
                                    paste0(substr(cont,1,2),">C",substr(cont,3,3)))
  )
)
#ref_count_sig_df%>%arrange(order=contextorder32)
ref_count_sig_df$cont<-factor(ref_count_sig_df$cont,levels=contextorder96)
t_tbl<-as.table(ref_count_sig_df$tot_count)
names(t_tbl)<-ref_count_sig_df$cont
t_tbl

ref_count_sig_df%>%filter(grepl("C>A",cont))
ref_count_sig_df%>%filter(grepl("C>A",cont))%>%
  ggplot(aes(x=cont,y=tot_count))+
  geom_bar(stat="identity")
ref_count_sig_df%>%mutate(mut_type=substr(cont,2,4))%>%
  ggplot(aes(x=cont,y=tot_count,fill=mut_type))+
  geom_bar(stat="identity")

ref_count_sig_df

barplot(t_tbl,las=2,
        col = rep(c12,each=16),
        border = rep(c12,each=16),
        cex.names=0.5,
        xaxt="n",
        beside="T",
        ylim=range(pretty(c(0,max(t_tbl))))
        #          legend=mut16
)
legend("topleft",
       legend = mut16,
       fill = c12,
       cex = 0.5,
       ncol=2,
       inset=c(-0.4,0))
legend("topleft",
       legend=".",bty="n",
       cex = 1,
       inset=c(-0.4,0.7))
print("doing")


p_ref_pdf<-ref_count_sig_df%>%filter(grepl("C>A",cont))%>%plyr::rename(c("tot_count"="count"))%>%mutate(tot_count=9098)%>%mutate(freq=count/tot_count)%>%mutate(new_sig_cont=gsub(">A","",cont))%>%
  ggplot(aes(x=new_sig_cont,y=freq,col=tot_count))+
  geom_bar(stat="identity")+
  theme_classic()+
  theme(axis.text.x=element_text(angle=90,vjust=0.5,size=40,family="Consolas"))+
  theme(axis.title=element_text(size=40),
        #     axis.text.x=element_text(size=40,angle=40,hjust=1),
        axis.text.y=element_text(size=40),
        strip.text=element_text(size=30),
        axis.ticks.length=unit(.5, "cm"),
        axis.ticks=element_line(colour="black",size=3)
  )+
  #scale_color_manual(values=c("black"))+
  guides(colour="none")


save_plot("/home/users/ayh/Projects/27_A3B/06_Figure_code/edit_figure/Fig3/ref_16.pdf",p_ref_pdf,
          ncol=1,
          nrow=2,
          base_asp=7,
          #unit="px",
          
          device=cairo_pdf)

##merge_ratio##

merge_ratio_df<-rbind(
  ref_count_sig_df%>%filter(grepl("C>A",cont))%>%plyr::rename(c("tot_count"="count"))%>%mutate(tot_count=9098)%>%mutate(freq=count/tot_count)%>%mutate(new_sig_cont=gsub(">A","",cont))%>%
    select(new_sig_cont,freq)%>%
    mutate(group="expected"),
  other_16_count_df%>%mutate(freq=n/other_16_count_df$n%>%sum())%>%
    select(new_sig_cont,freq)%>%
    mutate(group="observed")
)


p_mix<-merge_ratio_df%>%
  ggplot(aes(x=new_sig_cont,y=freq,fill=group,col="black"))+
  geom_bar(stat="identity",position="dodge")+
  theme_classic()+
  theme(axis.text.x=element_text(angle=90,vjust=0.5,size=40,family="Consolas"))+
  theme(axis.title=element_text(size=40),
        #     axis.text.x=element_text(size=40,angle=40,hjust=1),
        axis.text.y=element_text(size=40),
        strip.text=element_text(size=30),
        axis.ticks.length=unit(.5, "cm"),
        axis.ticks=element_line(colour="black",size=3)
  )+
  scale_y_continuous(breaks=seq(0,0.3,by=0.1),
                     labels=seq(0,0.3,by=0.1),
                     limits = c(0,0.3))+
  #scale_color_manual(values=c("black"))+
  guides(colour="none")
p_mix
save_plot("/home/users/ayh/Projects/27_A3B/06_Figure_code/edit_figure/Fig3/mix_16.pdf",p_mix,
          ncol=1,
          nrow=2,
          base_asp=7,
          #unit="px",
          
          device=cairo_pdf)
##cosim=0.659
ref_16_sig<-(ref_count_sig_df%>%filter(grepl("C>A",cont))%>%plyr::rename(c("tot_count"="count"))%>%mutate(tot_count=9098)%>%mutate(freq=count/tot_count)%>%mutate(new_sig_cont=gsub(">A","",cont))%>%
               select(new_sig_cont,freq))$freq
other_16_sig<-(other_16_count_df%>%mutate(tot=19)%>%mutate(freq=n/tot)%>%select(new_sig_cont,freq))$freq

sig_dot_product<-sum(ref_16_sig*other_16_sig)
mag1<-sqrt(sum(ref_16_sig^2))
mag2<-sqrt(sum(other_16_sig^2))

cos_sim<-sig_dot_product/(mag1*mag2)
cos_sim

# Define the two mutational signatures as vectors
signature1 <- c(0.1, 0.3, 0.2, 0.4)
signature2 <- c(0.2, 0.1, 0.3, 0.5)

# Calculate the dot product
dot_product <- sum(signature1 * signature2)

# Calculate the magnitudes
magnitude1 <- sqrt(sum(signature1^2))
magnitude2 <- sqrt(sum(signature2^2))

# Compute the cosine similarity
similarity <- dot_product / (magnitude1 * magnitude2)

# Print the cosine similarity
print(similarity)



A3A_sv_new_sig_df%>%filter(!grepl("TC>",sig_cont)&cluster=="kataegis")


A3A_sv_new_sig_mix_other_merge_df%>%
  group_by(new_sig_cont)%>%
  dplyr::summarise(n=n())#%>%
#ggplot
ref_96_df<-data.frame(sig_cont=contextorder96)%>%as.tibble()
kat_other_96_count_df<-left_join(ref_96_df,A3A_sv_new_sig_df%>%filter(!grepl("TC>",sig_cont)&cluster=="kataegis"))%>%
  group_by(sig_cont)%>%
  dplyr::summarise(count=n())

kat_other_96_count_df<-left_join(ref_96_df,A3A_sv_new_sig_df%>%filter(cluster=="kataegis")%>%
                                   group_by(sig_cont)%>%
                                   dplyr::summarise(count=n()))

kat_other_96_count_df[is.na(kat_other_96_count_df)]<-0
#other_96_count_df%>%print(n=100)

other_96_count_df$count

kat_other_96_count_df<-kat_other_96_count_df%>%mutate(Substitution=substr(sig_cont,2,4))%>%
  mutate(type_3=c(1:96))
library(cowplot)
palette.COSMIC.SNV.96<-c("#1EBFF0","#050708","#E62725","#CBCACB","#A1CF64","#EDC8C5")
h1<-ggplot(kat_other_96_count_df)+
  geom_hline(yintercept=seq(0,150,by=50),
             linetype="dotted",col="grey")+
  geom_bar(mapping=aes(x=type_3,y=count,fill=Substitution),
           stat="identity",
           position="dodge",
           width=0.5)+
  #scale_colour_manual(values=c("black"))+
  #  guides(fill = guide_legend(ncol = 2))+
  theme(#axis.text.x.bottom = element_blank(),
    #axis.ticks.x = element_blank(),
    axis.ticks.y=element_line(size=1,colour="grey"),
    axis.ticks.length.y=unit(-0.25,"cm"),
    panel.spacing.x = unit(0, "mm"),
    axis.title.x = element_blank(),
    strip.background.x = element_blank(),
    strip.text.x = element_blank())+
  #facet_grid(.~Substitution, scales = "free_x")+
  theme(axis.text.x=element_text(angle=90,vjust=0.5,hjust=1,size=20,family="Consolas"),
        #axis.text.y=element_text(size=55),
        axis.text.y=element_blank(),
        axis.title.y=element_text(size=60)
  )+
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.background = element_blank(),
        panel.border = element_rect(colour = "grey", fill=NA, size=1),
  )+
  scale_x_continuous(
    breaks = other_96_count_df$type_3%>%unique(),
    labels = contextorder96,
    expand = c(0.01, 0)
  )+
  scale_y_continuous(
    limits=c(0,160),
    expand=c(0,0),
    breaks=seq(0,160,by=50),
    labels=seq(0,160,by=50)
  )+
  xlab("")+ylab("")+
  scale_fill_manual(values=palette.COSMIC.SNV.96)+theme(legend.position = "none")+
  ylab("# of SNVs")

h1
h2<-ggplot(kat_other_96_count_df)+
  geom_bar(mapping = aes(x = type_3, y = 1, fill = Substitution),
           stat = "identity",
           width = 1)+
  theme_void()+
  theme(panel.spacing.x = unit(0, "mm"),)+
  facet_grid(.~Substitution, scales = "free_x")+
  scale_fill_manual(values=palette.COSMIC.SNV.96)+
  theme(legend.position = "none")+
  theme(strip.text.x=element_text(size=40))

h2

legend <- plot_grid(get_legend(h2), get_legend(h1), ncol = 1)
h1 <- h1 + theme(legend.position = "none")
h2 <- h2 + theme(legend.position = "none")

kat_plot<-plot_grid(h2, h1, align = "v", ncol = 1, axis = "tblr", rel_heights = c(0.5, 5))
kat_plot

save_plot("/home/users/ayh/Projects/27_A3B/06_Figure_code/edit_figure/Fig3/kat_96.pdf",kat_plot,
          ncol=1,
          nrow=2,
          base_asp=7,
          #unit="px",
          
          device=cairo_pdf)

##enlarge

kat_other_mag_96_count_df<-left_join(ref_96_df,A3A_sv_new_sig_df%>%filter(cluster=="kataegis")%>%
                                       group_by(sig_cont)%>%
                                       dplyr::summarise(count=n()))


A3A_sv_new_sig_df%>%filter(cluster=="kataegis")%>%
  group_by(sig_cont)%>%
  dplyr::summarise(count=n())%>%filter(grepl("TC>",sig_cont))

#sig_cont count
#<chr>    <int>
#TC>AA       29
#TC>AC       20
#TC>AT       21
#TC>GA       56
#TC>GC       11
#TC>GG        4
#TC>GT      103
#TC>TA      155
#TC>TC       26
#TC>TG       12
#TC>TT      112
kat_other_mag_96_count_df[is.na(kat_other_mag_96_count_df)]<-0
#other_96_count_df%>%print(n=100)

#kat_other_mag_96_count_df<-kat_other_mag_96_count_df%>%mutate(count=ifelse(grepl("TC>AG",sig_cont),0,
#                                                ifelse(grepl("TC>GG",sig_cont),4,
#                                                       ifelse(grepl("TC>GC",sig_cont),11,
#                                                              ifelse(grepl("TC>TG",sig_cont),12,
#                                                       ifelse(grepl("TC>",sig_cont),12.4,count))
#                                                )
#)))

kat_other_mag_96_count_df<-kat_other_mag_96_count_df%>%mutate(count=ifelse(grepl("TC>AG",sig_cont),0,
                                                                           ifelse(count<30,count,32)))

kat_other_mag_96_count_df<-kat_other_mag_96_count_df%>%mutate(Substitution=substr(sig_cont,2,4))%>%
  mutate(type_3=c(1:96))
library(cowplot)
palette.COSMIC.SNV.96<-c("#1EBFF0","#050708","#E62725","#CBCACB","#A1CF64","#EDC8C5")
h1<-ggplot(kat_other_mag_96_count_df)+
  #geom_hline(yintercept=seq(0,150,by=50),
  #           linetype="dotted",col="grey")+
  geom_bar(mapping=aes(x=type_3,y=count,fill=Substitution),
           stat="identity",
           position="dodge",
           width=0.5)+
  #scale_colour_manual(values=c("black"))+
  #  guides(fill = guide_legend(ncol = 2))+
  theme(#axis.text.x.bottom = element_blank(),
    #axis.ticks.x = element_blank(),
    axis.ticks.y=element_line(size=1,colour="grey"),
    axis.ticks.length.y=unit(-0.25,"cm"),
    panel.spacing.x = unit(0, "mm"),
    axis.title.x = element_blank(),
    strip.background.x = element_blank(),
    strip.text.x = element_blank())+
  #facet_grid(.~Substitution, scales = "free_x")+
  theme(axis.text.x=element_text(angle=90,vjust=0.5,hjust=1,size=20,family="Consolas"),
        #axis.text.y=element_text(size=55),
        axis.text.y=element_blank(),
        axis.title.y=element_text(size=60)
  )+
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.background = element_blank(),
        panel.border = element_rect(colour = "grey", fill=NA, size=1),
  )+
  scale_x_continuous(
    breaks = other_96_count_df$type_3%>%unique(),
    labels = contextorder96,
    expand = c(0.01, 0)
  )+
  scale_y_continuous(
    limits=c(0,32),
    expand=c(0,0),
    breaks=seq(0,30,by=10),
    labels=seq(0,30,by=10)
  )+
  xlab("")+ylab("")+
  scale_fill_manual(values=palette.COSMIC.SNV.96)+theme(legend.position = "none")+
  ylab("# of SNVs")

h1
h2<-ggplot(kat_other_96_count_df)+
  geom_bar(mapping = aes(x = type_3, y = 1, fill = Substitution),
           stat = "identity",
           width = 1)+
  theme_void()+
  theme(panel.spacing.x = unit(0, "mm"),)+
  facet_grid(.~Substitution, scales = "free_x")+
  scale_fill_manual(values=palette.COSMIC.SNV.96)+
  theme(legend.position = "none")+
  theme(strip.text.x=element_text(size=40))

h2

legend <- plot_grid(get_legend(h2), get_legend(h1), ncol = 1)
h1 <- h1 + theme(legend.position = "none")
h2 <- h2 + theme(legend.position = "none")

kat_mag_plot<-plot_grid(h2, h1, align = "v", ncol = 1, axis = "tblr", rel_heights = c(0.5, 5))
kat_mag_plot

save_plot("/home/users/ayh/Projects/27_A3B/06_Figure_code/edit_figure/Fig3/kat_mag_96.v2.pdf",kat_mag_plot,
          ncol=1,
          nrow=2,
          base_asp=7,
          #unit="px",
          
          device=cairo_pdf)





h1<-ggplot(kat_other_96_count_df)+
  geom_hline(yintercept=seq(0,150,by=50),
             linetype="dotted",col="grey")+
  geom_bar(mapping=aes(x=type_3,y=count,fill=Substitution),
           stat="identity",
           position="dodge",
           width=0.5)+
  #  guides(fill = guide_legend(ncol = 2))+
  theme(#axis.text.x.bottom = element_blank(),
    #axis.ticks.x = element_blank(),
    axis.ticks.y=element_line(size=1,colour="grey"),
    axis.ticks.length.y=unit(-0.25,"cm"),
    panel.spacing.x = unit(0, "mm"),
    axis.title.x = element_blank(),
    strip.background.x = element_blank(),
    strip.text.x = element_blank())+
  #facet_grid(.~Substitution, scales = "free_x")+
  theme(axis.text.x=element_text(angle=90,vjust=0.5,hjust=1,size=20,family="Consolas"),
        axis.text.y=element_text(size=55),
        axis.title.y=element_text(size=60)
  )+
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.background = element_blank(),
        panel.border = element_rect(colour = "grey", fill=NA, size=1),
  )+
  scale_x_continuous(
    breaks = other_96_count_df$type_3%>%unique(),
    labels = contextorder96,
    expand = c(0.01, 0)
  )+
  scale_y_continuous(
    limits=c(0,7),
    expand=c(0,0),
    breaks=seq(0,6,by=2),
    labels=seq(0,6,by=2)
  )+
  xlab("")+ylab("")+
  scale_fill_manual(values=palette.COSMIC.SNV.96)+theme(legend.position = "none")+
  ylab("# of SNVs")

h1
h2<-ggplot(kat_other_96_count_df)+
  geom_bar(mapping = aes(x = type_3, y = 1, fill = Substitution),
           stat = "identity",
           width = 1)+
  theme_void()+
  theme(panel.spacing.x = unit(0, "mm"),)+
  facet_grid(.~Substitution, scales = "free_x")+
  scale_fill_manual(values=palette.COSMIC.SNV.96)+
  theme(legend.position = "none")+
  theme(strip.text.x=element_text(size=40))

h2

legend <- plot_grid(get_legend(h2), get_legend(h1), ncol = 1)
h1 <- h1 + theme(legend.position = "none")
h2 <- h2 + theme(legend.position = "none")

kat_plot<-plot_grid(h2, h1, align = "v", ncol = 1, axis = "tblr", rel_heights = c(0.5, 5))
kat_plot(base)
}