library(dplyr)
library(tidyverse)
library(ggplot2)
sig_ori_df<-read_tsv("/home/users/ayh/Projects/reference/cosmic_sig.v3.tsv")%>%
  select(Type,SubType,SBS2,SBS13)%>%
  plyr::rename(c("Type"="mut_type","SubType"="three_bp_cont"))%>%
  mutate(cont_group=ifelse(grepl("^TC",three_bp_cont),"TCN",
                    ifelse(grepl(".C.",three_bp_cont),"non-TCN","NTN"))
  )%>%
  gather("SBS","rate",3:4)


sig_ori_sum_df<-sig_ori_df%>%
  group_by(SBS,cont_group)%>%
  dplyr::summarise(sum_rate=sum(rate))
  
  
kat_sig_df<-read_tsv("/home/users/ayh/Projects/27_A3B/03_sequencing/single_clone_sequencing/WGS/30_clustered_mutation/vcn_ori_SBS2_SBS13/Count.signature_exposures.tsv")%>%
  mutate(Signature=gsub("v3_","SBS",Signature))%>%
  filter(Signature%in%c("SBS2","SBS13","SBS18","SBS40"))%>%
  plyr::rename(c("Signature"="SBS"))

kat_sig_df$Exposure<-round(kat_sig_df$Exposure)


kat_exp_count_df<-left_join(sig_ori_sum_df,
kat_sig_df%>%select(SBS,Exposure))%>%
  mutate(exp_count=sum_rate*Exposure)%>%
  mutate(tot_count=sum(kat_exp_count_df$exp_count))%>%
  mutate(rate=exp_count/tot_count)#%>%

  

kat_cont_df<-read_tsv("/home/users/ayh/Projects/27_A3B/03_sequencing/single_clone_sequencing/WGS/30_clustered_mutation/vcn_ori_SBS2_SBS13/Count.signature_spectra.tsv")%>%
  plyr::rename(c("Trinucleotide"="three_bp_cont"))%>%
  mutate(cont_group=ifelse(grepl("^TC",three_bp_cont),"TCN",
                           ifelse(grepl(".C.",three_bp_cont),"non-TCN","NTN"))
  )%>%
  group_by(cont_group)%>%
  dplyr::summarise(exp_count=sum(Reconstructed))%>%
  mutate(tot_exp_count=sum(exp_count))%>%
  mutate(cont_rate=exp_count/tot_exp_count)%>%
  mutate(val_group="expected")



other_kat_df<-read_tsv("/home/users/ayh/Projects/27_A3B/06_Figure_code/edit_figure/Fig3/other_kat_sum_table.v2.txt")%>%mutate(cont_group=ifelse(grepl("^TC",sig_cont),"TCN",
                                                                                                                                                ifelse(grepl("^.C",sig_cont),"VCN","NTN")))
mix_kat_df<-read_tsv("/home/users/ayh/Projects/27_A3B/06_Figure_code/edit_figure/Fig3/mix_kat_sum_table.v2.txt")%>%mutate(cont_group=ifelse(grepl("^TC",sig_cont)|grepl("GA$",sig_cont),"TCN",
                                                                                                                                            ifelse(grepl("^.C",sig_cont)|grepl("G.$",sig_cont),"VCN","NTN")))

other_kat_group_df<-other_kat_df%>%
  group_by(cont_group)%>%
  dplyr::summarise(sum_kat_mut_count=sum(kat_mut_count),
                   sum_kat_ref_cont_count=sum(kat_ref_cont_count),
                   sum_tot_ref_cont_count=sum(tot_ref_cont_count),
                   sum_kat_out_ref_cont_count=sum(kat_out_ref_cont_count),
                   sum_tot_mut_count=sum(tot_mut_count),
                   sum_kat_out_mut_count=sum(kat_out_mut_count))



mix_kat_group_df<-mix_kat_df%>%
  group_by(cont_group)%>%
  dplyr::summarise(sum_mix_kat_mut_cont_sum=sum(mix_kat_mut_cont_sum),
                   sum_mix_kat_ref_cont_count_sum=sum(mix_kat_ref_cont_count_sum),
                   sum_mix_tot_ref_cont_count_sum=sum(mix_tot_ref_cont_count_sum),
                   sum_mix_kat_out_ref_cont_count_sum=sum(mix_kat_out_ref_cont_count_sum),
                   sum_tot_mut_count_sum=sum(tot_mut_count_sum),
                   sum_mix_kat_out_mut_count_sum=sum(mix_kat_out_mut_count_sum),
                   sum_mix_kat_mut_exp_count_sum=sum(mix_kat_mut_exp_cont_sum))


kat_count_df<-left_join(other_kat_group_df%>%select(cont_group,sum_kat_mut_count)%>%
                          plyr::rename(c("sum_kat_mut_count"="tot_count")),
                        mix_kat_group_df%>%select(cont_group,sum_mix_kat_mut_cont_sum)%>%
                          plyr::rename(c("sum_mix_kat_mut_cont_sum"="mix_count"))
)

kat_count_df<-kat_count_df%>%mutate(ref_count=tot_count-mix_count)%>%
  mutate(ref_ratio=ref_count/tot_count,
         mix_ratio=mix_count/tot_count)


kat_count_merge_df<-rbind(
kat_count_df%>%#filter(count_type=="ref_count")%>%
  select(cont_group,ref_count,mix_count)%>%
  gather(count_type,n,2:3)%>%
  mutate(perc=n/sum(n))%>%
  filter(count_type=="ref_count")
,
kat_count_df%>%#filter(count_type=="ref_count")%>%
  select(cont_group,ref_count,mix_count)%>%
  gather(count_type,n,2:3)%>%
  mutate(perc=n/sum(n))%>%
  filter(count_type=="mix_count")%>%
  group_by(count_type)%>%
  dplyr::summarise(n=sum(n),
                   perc=sum(perc))%>%
  mutate(cont_group="mix_tot")
)


  
  
kat_count_tot_merge_df<-rbind(
sig_ori_sum_df%>%plyr::rename(c("sum_rate"="cont_rate","SBS"="val_group")),
kat_cont_df%>%select(cont_group,cont_rate,val_group),
kat_count_merge_df%>%
  select(cont_group,perc,count_type)%>%
  plyr::rename(c("perc"="cont_rate",
                 "count_type"="val_group"))%>%
  mutate(val_group="ref_count")%>%
  mutate(cont_group=ifelse(cont_group=="VCN","non-TCN",cont_group))
)

kat_count_tot_merge_df$val_group<-factor(kat_count_tot_merge_df$val_group,levels=c("SBS2","SBS13","expected","ref_count"))
kat_count_tot_merge_df$cont_group<-factor(kat_count_tot_merge_df$cont_group,levels=c("mix_tot","non-TCN","TCN","NTN"))

p1<-kat_count_tot_merge_df%>%
  ggplot(aes(x=val_group,y=cont_rate,fill=cont_group,col="black"))+
  geom_bar(stat="identity",width=0.4)+
  theme_classic()+
  scale_colour_manual(values=c("black"))

ggsave("/home/users/ayh/Projects/27_A3B/06_Figure_code/edit_figure/Fig3/kataegis_cont.pdf",p1,
       width=10,height=8)

dat1<-mix_kat_group_df%>%select(cont_group,sum_mix_kat_mut_cont_sum)%>%
  plyr::rename(c("sum_mix_kat_mut_cont_sum"="mix_count"))%>%
  mutate(tot_count=sum(mix_count))%>%
  mutate(ratio=mix_count/tot_count)

dat1$ymax<-cumsum(dat1$mix_count)
dat1$ymin<-cumsum(dat1$mix_count)-dat1$mix_count
dat1$ypos<-dat1$ymin+dat1$mix_count/2
dat1$ratio<-dat1$mix_count*100/sum(dat1$mix_count)

theme_clean=function(base_size=12){
  theme_grey(base_size) %+replace%
    theme(
      axis.title=element_blank(),
      axis.text=element_blank(),
      panel.background=element_blank(),
      panel.grid=element_blank(),
      axis.ticks.length=unit(0,"cm"),
      axis.ticks.margin=unit(0,"cm"),
      panel.margin=unit(0,"lines"),
      plot.margin=unit(c(0,0,0,0),"lines"),
      complete=TRUE
    )
}

theme_axis_blank=function(){
  theme(axis.ticks=element_blank(),
        axis.text.x=element_blank(),
        axis.text.y=element_blank(),
        axis.title.x=element_blank(),
        axis.title.y=element_blank())
}


data2=plyr::ddply(dat1,"cont_group",summarize,sum(mix_count))
colnames(data2)[2]="sum"
data2=data2[order(data2$sum,decreasing=TRUE),]
data2$cumsum=cumsum(data2$sum)
data2$pos=data2$cumsum-data2$sum/2
data2$ymin=data2$cumsum-data2$sum
data2$ratio=data2$sum*100/sum(data2$sum)

p2<-ggplot()+
geom_rect(data=data2,aes(xmin=0,xmax=3,ymin=ymin,ymax=cumsum,fill=cont_group))+
  geom_text(data=data2,aes(x=1.5,y=pos,label=cont_group))+
  coord_polar(theta="y")+
  guides(fill=FALSE)+
  theme_clean()
ggsave("/home/users/ayh/Projects/27_A3B/06_Figure_code/edit_figure/Fig3/kataegis_cont.mix.pie.pdf",p2,
       width=10,height=8)



