
library(dplyr)
library(tidyverse)
library(ggplot2)

#A3A_event_df<-read_tsv("/home/users/ayh/Projects/27_A3B/06_Figure_code/edit_figure/Fig3/A3A_event_df.edit.tsv")
A3A_event_df<-read_tsv("/home/users/ayh/Projects/27_A3B/06_Figure_code/edit_figure/Fig3/A3A_event_df.edit.exclsv.tsv")
pcawg_event_df<-read_tsv("/home/users/ayh/Projects/27_A3B/06_Figure_code/edit_figure/Fig3/pcawg_event_df.edit.tsv")


###


pcawg_event_df
#A3A_event_df$id

tot_event_df<-rbind(pcawg_event_df%>%
                      select(-Project_Code),
                    
                    A3A_event_df%>%
                      plyr::rename(c("APOBEC"="sub_project_code"))%>%
                      select(-dose,-TP53)
)

tot_event_df

pcawg_APOBEC_df<-read_tsv("/home/users/ayh/Projects/27_A3B/03_sequencing/single_clone_sequencing/WGS/30_clustered_mutation/fig/00_cl_ratio/pcawg_APOBEC_table.txt")%>%
  select(id,APOBEC)

tot_event_df<-left_join(tot_event_df,pcawg_APOBEC_df)
tot_event_df<-tot_event_df%>%mutate(APOBEC=ifelse(sub_project_code=="A3A","A3A",APOBEC))
tot_event_df%>%filter(cluster%in%c("kataegis","omikli"))%>%
  ggplot(aes(x=tot_event,y=sum_n,col=sub_project_code))+
  geom_point()+
  facet_wrap()

A3A_event_df

pcawg_tot_count_df<-read_tsv("/home/users/ayh/Projects/27_A3B/03_sequencing/single_clone_sequencing/WGS/30_clustered_mutation/fig/00_cl_ratio/pcawg_tot_count.txt")
APOBEC_tot_count_df<-read_tsv("/home/users/ayh/Projects/27_A3B/03_sequencing/single_clone_sequencing/WGS/24_mutect2_strelka2_baseq/merge/clonal/new_filter/APOBEC_tot_count.txt")%>%
  plyr::rename(c("tot_count"="tot_snv"))

pcawg_tot_count_df

count_merge_df<-rbind(pcawg_tot_count_df,APOBEC_tot_count_df%>%select(tot_snv,id))
count_merge_df%>%filter(id=="A3A_1st_C3_100ng-1")
tot_event_df<-left_join(tot_event_df,count_merge_df)
tot_event_df%>%filter(sub_project_code=="A3A")


#tot_event_df%>%filter(cluster%in%c("kataegis","omikli"))%>%
#  write.table("/home/users/ayh/Projects/27_A3B/06_Figure_code/edit_figure/Fig3/tot_event_df.edit.txt",
#              sep="\t",
#              row.names=F,
#              quote=F)
##3adding clonality###

pcawg_clonal_event_df<-read_tsv("/home/users/ayh/Projects/27_A3B/06_Figure_code/edit_figure/Fig3/pcawg_clonal_event_df.tsv")
pcawg_clonal_event_edit_df<-read_tsv("/home/users/ayh/Projects/27_A3B/06_Figure_code/edit_figure/Fig3/pcawg_clonal_event_df.edit.exclsv.tsv")
pcawg_clonal_event_df
pcawg_clonal_event_edit_df
pcawg_APOBEC_clonal_df<-read_tsv("/home/users/ayh/Projects/27_A3B/03_sequencing/single_clone_sequencing/WGS/30_clustered_mutation/fig/00_cl_ratio/pcawg_APOBEC_clonal_table.txt")%>%
  select(id,APOBEC)%>%mutate(clonality="clonal")


pcawg_APOBEC_df<-read_tsv("/home/users/ayh/Projects/27_A3B/03_sequencing/single_clone_sequencing/WGS/30_clustered_mutation/fig/00_cl_ratio/pcawg_APOBEC_table.txt")%>%
  select(id,APOBEC)%>%mutate(clonality="tot")


pcawg_event_df
#A3A_event_df$id

tot_clonality_event_df<-rbind(pcawg_event_df%>%
                                select(-Project_Code)%>%mutate(clonality="tot")%>%left_join(pcawg_APOBEC_df),
                              
                              A3A_event_df%>%
                                plyr::rename(c("APOBEC"="sub_project_code"))%>%
                                select(-dose,-TP53)%>%mutate(clonality="tot")%>%mutate(APOBEC="A3A")
)%>%rbind(pcawg_clonal_event_edit_df%>%select(-Project_Code)%>%mutate(clonality="clonal")%>%left_join(pcawg_APOBEC_clonal_df))



tot_clonality_event_df%>%filter(sub_project_code=="A3A")

tot_clonality_event_df<-tot_clonality_event_df%>%mutate(APOBEC=ifelse(sub_project_code=="A3A","A3A",APOBEC))

tot_event_df%>%filter(cluster%in%c("kataegis","omikli"))%>%
  ggplot(aes(x=tot_event,y=sum_n,col=sub_project_code))+
  geom_point()+
  facet_wrap()

A3A_event_df

pcawg_tot_count_df<-read_tsv("/home/users/ayh/Projects/27_A3B/03_sequencing/single_clone_sequencing/WGS/30_clustered_mutation/fig/00_cl_ratio/pcawg_tot_count.txt")%>%
  mutate(clonality="tot")
APOBEC_tot_count_df<-read_tsv("/home/users/ayh/Projects/27_A3B/03_sequencing/single_clone_sequencing/WGS/24_mutect2_strelka2_baseq/merge/clonal/new_filter/APOBEC_tot_count.v2.txt")%>%
  plyr::rename(c("tot_count"="tot_snv"))%>%
  mutate(clonality="tot")
pcawg_clonal_tot_count_df<-read_tsv("/home/users/ayh/Projects/27_A3B/03_sequencing/single_clone_sequencing/WGS/30_clustered_mutation/fig/00_cl_ratio/pcawg_clonal_tot_count.txt")%>%
  mutate(clonality="clonal")

tot_clonality_event_df<-left_join(tot_clonality_event_df,rbind(pcawg_tot_count_df,APOBEC_tot_count_df,pcawg_clonal_tot_count_df))


tot_clonality_event_df%>%
  filter(APOBEC=="A3A",sub_project_code!="A3A")%>%
  filter(cluster%in%c("kataegis","omikli"))%>%
  ggplot(aes(x=tot_event,y=sum_n,col=clonality))+
  geom_point()+
  facet_wrap(~cluster+sub_project_code,ncol=10)+
  geom_smooth(method='lm', formula= y~x,
              fullrange=TRUE,se=FALSE)

tot_ratio_df<-read_tsv("/home/users/ayh/Projects/27_A3B/06_Figure_code/Fig3/tot_ratio_df.v2.txt")
tot_clonality_event_df
tot_ratio_df
tot_ratio_df%>%filter(id=="00db4dc2-3ec7-4ff9-9233-d69c8c8a607f")
tot_event_ratio_df<-left_join(tot_clonality_event_df%>%plyr::rename(c("ratio"="cl_ratio")),
                              tot_ratio_df%>%select(id,sub_project_code,Signature,ratio,APOBEC,clonality)
)

brca_target_df<-A3A_tot_event_ratio_df%>%filter(clonality=="clonal"|sub_project_code=="A3A")%>%filter(Signature=="SBS2")%>%
  filter(sub_project_code=="BRCA"&cl_ratio>0.07)%>%select(id)%>%unique()

##sv_correlation##

pcawg_sv_df<-read_tsv("/home/users/ayh/Projects/27_A3B/06_Figure_code/Fig3/pcawg_sv_sum.txt")
pcawg_sv_df%>%gather(sv_type,n,DEL:TRA)%>%
  group_by(Project_Code,id)%>%
  dplyr::summarise(tot_sv_n=sum(n))
pcawg_sv_df%>%gather(sv_type,n,DEL:TRA)%>%
  group_by(Project_Code,id)%>%
  dplyr::summarise(tot_sv_n=sum(n))%>%
  mutate(sub_project_code=gsub("-.*","",Project_Code))%>%
  mutate(sub_class=ifelse(id%in%brca_target_df$id,"O","X"))%>%
  ggplot(aes(x=sub_project_code,y=tot_sv_n,col=sub_class))+
  geom_jitter()+
  scale_colour_manual(values=c("red","black"))

left_join(tot_event_ratio_df,pcawg_sv_df%>%select(-Project_Code))%>%filter(cluster%in%c("kataegis","omikli"))%>%
  filter(APOBEC=="A3A")%>%
  filter(sub_project_code%in%c("BLCA","BRCA","ESAD","HNSC","LUAD","LUSC","UCEC"))%>%
  ggplot(aes(x=DEL,y=cl_ratio,col=sub_project_code))+
  geom_point()+
  facet_wrap(~cluster+sub_project_code,ncol=7)+
  theme_bw()


left_join(tot_event_ratio_df,pcawg_sv_df%>%gather(sv_type,n,DEL:TRA)%>%
            group_by(Project_Code,id)%>%
            dplyr::summarise(tot_sv_n=sum(n)))%>%
  filter(APOBEC=="A3A")%>%
  filter(sub_project_code%in%c("BLCA","BRCA","ESAD","HNSC","LUAD","LUSC","UCEC"))%>%
  filter(cluster%in%c("kataegis","omikli"))%>%
  ggplot(aes(x=sum_n,y=cl_ratio,col=sub_project_code))+
  geom_point()+
  facet_wrap(~cluster+sub_project_code,ncol=7)+
  theme_bw()

pcawg_imd_cutoffs<-read_tsv('/home/users/ayh/Projects/27_A3B/01_public_data/pcawg/timing_clonality/v2/vcf/separate/imds/APOBEC_clustered_mutations_imd_cutoff.tsv')
pcawg_clonal_imd_cutoffs<-read_tsv('/home/users/ayh/Projects/27_A3B/01_public_data/pcawg/timing_clonality/v2/vcf/separate/imds/APOBEC_clustered_mutations_imd_cutoff.tsv')
APOBEC_imd_cutoffs <- read_tsv('/home/users/ayh/Projects/27_A3B/03_sequencing/single_clone_sequencing/WGS/25_simulation/jolim_rerun/v2/imds/APOBEC_clustered_mutations_imd_cutoff.tsv')
total_imd_cutoffs<-rbind(pcawg_imd_cutoffs%>%mutate(clonality="tot"),
                         pcawg_clonal_imd_cutoffs%>%mutate(clonality="clonal"),
                         APOBEC_imd_cutoffs%>%mutate(clonality="tot")
)
total_imd_cutoffs<-total_imd_cutoffs%>%plyr::rename(c("Sample"="id"))
tot_event_ratio_df%>%filter(is.na(IMD_cutoff))%>%print(n=100)
tot_event_ratio_df<-tot_event_ratio_df%>%left_join(total_imd_cutoffs)
tot_event_ratio_df%>%filter(is.na(IMD_cutoff))
tot_event_ratio_df%>%
  filter(APOBEC=="A3A")%>%
  filter(cluster%in%c("kataegis","omikli"))%>%
  ggplot(aes(x=tot_snv,y=IMD_cutoff))+
  geom_point()


A3A_tot_event_ratio_df<-tot_event_ratio_df%>%
  filter(APOBEC=="A3A")%>%
  filter(cluster%in%c("kataegis","omikli"))

A3A_tot_event_ratio_df%>%filter(clonality=="clonal"|sub_project_code=="A3A")%>%
  ggplot(aes(x=tot_event,y=sum_n,col=tot_snv))+
  geom_point()+
  facet_wrap(~cluster+sub_project_code,ncol=11)#+
#scale_x_log10()

A3A_tot_event_ratio_df%>%filter(clonality=="clonal"|sub_project_code=="A3A")%>%
  mutate(sig_ratio=`2and13sum`/tot_snv)%>%
  ggplot(aes(x=sig_ratio,y=cl_ratio))+
  geom_point()+
  facet_wrap(~cluster+sub_project_code,ncol=11)

A3A_tot_event_ratio_df%>%filter(clonality=="clonal"|sub_project_code=="A3A")%>%
  ggplot(aes(x=IMD_cutoff))+
  geom_histogram()+
  facet_wrap(~cluster+sub_project_code,ncol=11)

A3A_tot_event_ratio_df%>%filter(clonality=="clonal"|sub_project_code=="A3A")%>%
  filter(sub_project_code%in%target_type)%>%
  ggplot(aes(x=IMD_cutoff,y=cl_ratio))+
  geom_point()+
  facet_wrap(~cluster+sub_project_code,ncol=7)


A3A_tot_event_ratio_df%>%filter(clonality=="clonal"|sub_project_code=="A3A")%>%
  filter(sub_project_code%in%target_type)%>%
  ggplot(aes(x=`2and13sum`,y=cl_ratio))+
  geom_point()+
  facet_wrap(~cluster+sub_project_code,ncol=7)

library(plotly)
plot_ly(x=(A3A_tot_event_ratio_df%>%filter(clonality=="clonal"|sub_project_code=="A3A")%>%
             filter(sub_project_code%in%target_type))$IMD_cutoff,
        y=(A3A_tot_event_ratio_df%>%filter(clonality=="clonal"|sub_project_code=="A3A")%>%
             filter(sub_project_code%in%target_type))$`2and13sum`,
        z=(A3A_tot_event_ratio_df%>%filter(clonality=="clonal"|sub_project_code=="A3A")%>%
             filter(sub_project_code%in%target_type))$`cl_ratio`, type="scatter3d", mode="markers", color=(A3A_tot_event_ratio_df%>%filter(clonality=="clonal"|sub_project_code=="A3A")%>%
                                                                                                             filter(sub_project_code%in%target_type))$sub_project_code)


A3A_tot_event_ratio_df%>%filter(clonality=="clonal"|sub_project_code=="A3A")%>%filter(Signature=="SBS2")%>%
  filter(sub_project_code%in%target_type)%>%
  ggplot(aes(x=sub_project_code,y=cl_ratio))+
  geom_boxplot(outlier.shape = NA)+
  geom_jitter()+
  facet_wrap(~cluster,ncol=7)



A3A_tot_event_ratio_df%>%filter(clonality=="clonal"|sub_project_code=="A3A")%>%filter(Signature=="SBS2")%>%
  filter(sub_project_code%in%target_type)%>%filter(cluster=="kataegis")%>%
  ggplot(aes(x=tot_event,y=sum_n,size=IMD_cutoff))+
  geom_point()+
  facet_wrap(~cluster+sub_project_code,ncol=7)

A3A_tot_event_ratio_df%>%filter(clonality=="clonal"|sub_project_code=="A3A")%>%filter(Signature=="SBS2")%>%
  filter(sub_project_code%in%target_type)%>%filter(cluster=="omikli")%>%filter(IMD_cutoff>500)%>%
  ggplot(aes(x=tot_event,y=sum_n,size=IMD_cutoff,col=sub_project_code))+
  geom_point()+
  theme_bw()


A3A_tot_event_ratio_df%>%filter(clonality=="clonal"|sub_project_code=="A3A")%>%filter(Signature=="SBS2")%>%
  filter(cluster=="omikli")%>%filter(IMD_cutoff>255)%>%
  ggplot(aes(x=tot_event,y=sum_n))+
  geom_point(size=5,alpha=0.7,aes(colour=sub_project_code))+
  theme_bw()+
  geom_smooth(method="lm",colour="black")#+
scale_color_manual(values=c("#abddfb","#9a6324","#DC8D8D"))


A3A_tot_event_ratio_df%>%filter(clonality=="clonal"|sub_project_code=="A3A")%>%filter(Signature=="SBS2")%>%
  filter(sub_project_code%in%c("A3A","BLCA","BRCA"))%>%filter(cluster=="omikli")%>%filter(IMD_cutoff>505)%>%
  ggplot(aes(x=IMD_cutoff,y=cl_ratio))+
  geom_point(size=5,alpha=0.7,aes(colour=sub_project_code))+
  theme_bw()+
  geom_smooth(method="lm",colour="black")+
  scale_color_manual(values=c("#abddfb","#9a6324","#DC8D8D"))


A3A_tot_event_ratio_df%>%filter(clonality=="clonal"|sub_project_code=="A3A")%>%filter(Signature=="SBS2")%>%
  filter(cluster=="kataegis")%>%filter(IMD_cutoff>255)%>%
  ggplot(aes(x=tot_event,y=sum_n))+
  geom_point(size=5,alpha=0.7,aes(colour=sub_project_code))+
  theme_bw()+
  geom_smooth(method="lm",colour="black")#+
scale_color_manual(values=c("#abddfb","#9a6324","#DC8D8D"))

A3A_tot_event_ratio_df%>%filter(clonality=="clonal"|sub_project_code=="A3A")%>%filter(Signature=="SBS2")%>%
  filter(sub_project_code%in%target_type)%>%filter(cluster=="omikli")%>%filter(IMD_cutoff>255)%>%
  group_by(sub_project_code)%>%
  dplyr::summarise(n=n())

tot_quant_df<-(A3A_tot_event_ratio_df%>%filter(clonality=="clonal"|sub_project_code=="A3A")%>%filter(Signature=="SBS2")%>%
                 filter(`2and13sum`>0)%>%
                 filter(cluster=="omikli"))%>%
  filter(sub_project_code%in%target_type)%>%
  filter(!is.na(IMD_cutoff))
tot_quant_df$IMD_cutoff%>%quantile()
tot_quant_df%>%filter(is.na(IMD_cutoff))
A3A_tot_event_ratio_df%>%filter(clonality=="clonal"|sub_project_code=="A3A")%>%filter(Signature=="SBS2")%>%
  filter(IMD_cutoff>2197)%>%print(n=100)
A3A_tot_event_ratio_df%>%filter(clonality=="clonal"|sub_project_code=="A3A")%>%filter(Signature=="SBS2")%>%
  filter(IMD_cutoff>1758)%>%
  ggplot(aes(x=tot_event,y=sum_n,col=sub_project_code))+
  geom_point()+
  facet_wrap(~cluster,scales="free_y")#+
#  geom_smooth(method="lm")


##figure###

A3A_tot_event_ratio_df%>%filter(clonality=="clonal"|sub_project_code=="A3A")%>%filter(Signature=="SBS2")%>%
  filter(IMD_cutoff>1758)%>%
  ggplot(aes(x=tot_event,y=sum_n,col=sub_project_code))+
  geom_point()+
  facet_wrap(~cluster,scales="free_y")#+





A3A_tot_event_ratio_df%>%filter(clonality=="clonal"|sub_project_code=="A3A")%>%filter(Signature=="SBS2")%>%
  ggplot(aes(x=sub_project_code,y=IMD_cutoff,col=sub_project_code))+
  geom_point()+
  facet_wrap(~cluster,scales="free_y")#+


A3A_tot_event_ratio_df%>%filter(clonality=="clonal"|sub_project_code=="A3A")%>%filter(Signature=="SBS2")%>%
  filter(IMD_cutoff>1758)%>%
  ggplot(aes(x=sub_project_code,y=cl_ratio,col=sub_project_code))+
  geom_boxplot(outlier.shape=NA)+
  geom_jitter()

A3A_tot_event_ratio_df%>%filter(clonality=="clonal"|sub_project_code=="A3A")%>%filter(Signature=="SBS2")%>%
  filter(IMD_cutoff>1758)%>%
  filter(`2and13sum`>200)%>%
  ggplot(aes(x=sub_project_code,y=cl_ratio,col=sub_project_code))+
  facet_wrap(~cluster)+
  geom_boxplot(outlier.shape=NA)+
  geom_jitter()


A3A_tot_event_ratio_df%>%filter(clonality=="clonal"|sub_project_code=="A3A")%>%filter(Signature=="SBS2")%>%
  filter(IMD_cutoff>1758)%>%
  filter(`2and13sum`>300)%>%
  ggplot(aes(x=tot_event,y=sum_n,col=sub_project_code))+
  facet_wrap(~cluster,scales="free_y")+
  geom_point()#+
#  geom_boxplot(outlier.shape=NA)+
#  geom_jitter()

A3A_tot_event_ratio_df%>%filter(clonality=="clonal"|sub_project_code=="A3A")%>%filter(Signature=="SBS2")%>%
  filter(IMD_cutoff>1758)%>%
  filter(`2and13sum`>200)%>%
  ggplot(aes(x=tot_event,y=sum_n,col=sub_project_code))+
  facet_wrap(~cluster,scales="free_y")+
  geom_jitter()
geom_boxplot()

A3A_tot_event_ratio_df%>%filter(clonality=="clonal"|sub_project_code=="A3A")%>%filter(Signature=="SBS2")%>%
  filter(IMD_cutoff>1758)%>%
  filter(`2and13sum`>200)%>%group_by(sub_project_code,cluster)%>%
  dplyr::summarise(median_ratio=median(cl_ratio))%>%spread(cluster,median_ratio)

A3A_tot_event_ratio_df%>%filter(clonality=="clonal"|sub_project_code=="A3A")%>%filter(Signature=="SBS2")%>%
  filter(IMD_cutoff>1758)%>%
  filter(`2and13sum`>300)%>%
  ggplot(aes(x=tot_event,y=sum_n,col=sub_project_code))+
  facet_wrap(~cluster)+
  #  geom_boxplot(outlier.shape=NA)+
  geom_jitter()



A3A_tot_event_ratio_df%>%filter(clonality=="clonal"|sub_project_code=="A3A")%>%filter(Signature=="SBS2")%>%
  filter(IMD_cutoff>2832)%>%
  ggplot(aes(x=sub_project_code,y=cl_ratio,col=sub_project_code))+
  geom_boxplot(outlier.shape=NA)+
  geom_jitter()+
  facet_wrap(~cluster,scales="free_y")
#  facet_wrap(~cluster,scales="free_y")#+
#  geom_smooth(method="lm")

A3A_tot_event_ratio_df%>%filter(clonality=="clonal"|sub_project_code=="A3A")%>%filter(Signature=="SBS2")%>%
  filter(IMD_cutoff>1758)%>%
  ggplot(aes(x=IMD_cutoff,y=cl_ratio,col=sub_project_code))+
  geom_point()+
  facet_wrap(~cluster,scales="free_y",ncol=11)


A3A_tot_event_ratio_df%>%filter(clonality=="clonal"|sub_project_code=="A3A")%>%filter(Signature=="SBS2")%>%
  filter(cluster=="kataegis")%>%
  filter(`2and13sum`>300)%>%
  #filter(IMD_cutoff>1758)%>%
  ggplot(aes(x=IMD_cutoff,y=cl_ratio,col=sub_project_code))+
  geom_point()+
  geom_vline(xintercept=1758,col="red")+
  facet_wrap(~cluster+sub_project_code,ncol=11)




A3A_tot_event_ratio_df%>%filter(clonality=="clonal"|sub_project_code=="A3A")%>%filter(Signature=="SBS2")%>%
  filter(cluster=="kataegis")%>%filter(cl_ratio<0.01)%>%
  arrange(-IMD_cutoff)%>%
  filter(sub_project_code=="BRCA")%>%print(n=100)

A3A_tot_event_ratio_df%>%filter(clonality=="clonal"|sub_project_code=="A3A")%>%filter(Signature=="SBS2")%>%
  filter(cluster=="omikli")%>%
  filter(`2and13sum`>300)%>%
  #filter(IMD_cutoff>1758)%>%
  ggplot(aes(x=IMD_cutoff,y=cl_ratio,col=sub_project_code))+
  geom_point()+
  geom_vline(xintercept=1758,col="red")+
  facet_wrap(~cluster+sub_project_code,ncol=11)


A3A_tot_event_ratio_df%>%filter(clonality=="clonal"|sub_project_code=="A3A")%>%filter(Signature=="SBS2")%>%
  #  filter(cluster=="omikli")%>%
  filter(`2and13sum`>300)%>%
  filter(IMD_cutoff>1758)%>%
  ggplot(aes(x=tot_event,y=sum_n,col=sub_project_code))+
  geom_point()+
  geom_vline(xintercept=1758,col="red")+
  facet_wrap(~cluster+sub_project_code,ncol=11)


#####correction####

kat_y=function(x){
  1.01-1.02*exp(-exp(-7.08)*x)
}

omi_y=function(x){
  0.94-0.92*exp(-exp(-6.20)*x)
}
left_join(A3A_tot_event_ratio_df%>%filter(clonality=="clonal"|sub_project_code=="A3A")%>%filter(Signature=="SBS2"),
          A3A_tot_event_ratio_df%>%filter(clonality=="clonal"|sub_project_code=="A3A")%>%filter(Signature=="SBS2")%>%
            mutate(cor_sum_n=ifelse(cluster=="kataegis",sum_n/kat_y(IMD_cutoff)*1.01,sum_n/omi_y(IMD_cutoff)*0.93))%>%
            dplyr::select(id,cluster,cor_sum_n)%>%
            spread(cluster,cor_sum_n)%>%
            plyr::rename(c("kataegis"="cor_kataegis","omikli"="cor_omikli")))%>%
  mutate(cor_cl_event=cor_kataegis+cor_omikli)%>%
  mutate(cor_tot_event=noncl_TCN+cor_cl_event)%>%
  #arrange(-cor_tot_event)%>%
  dplyr::select(id,cor_kataegis,cor_omikli,sub_project_code,IMD_cutoff,`2and13sum`,cor_tot_event)%>%gather(cor_cluster,cor_sum_n,cor_omikli:cor_kataegis)%>%unique()%>%
  filter(cor_sum_n!=0)%>%
  filter(`2and13sum`>200)%>%
  #filter(IMD_cutoff>1758)%>%
  ggplot(aes(x=cor_tot_event,y=cor_sum_n,col=sub_project_code))+
  geom_point()+
  facet_wrap(~cor_cluster,scales="free_y")
#scale_y_log10()

left_join(A3A_tot_event_ratio_df%>%filter(clonality=="clonal"|sub_project_code=="A3A")%>%filter(Signature=="SBS2"),
          A3A_tot_event_ratio_df%>%filter(clonality=="clonal"|sub_project_code=="A3A")%>%filter(Signature=="SBS2")%>%
            mutate(cor_sum_n=ifelse(cluster=="kataegis",sum_n/kat_y(IMD_cutoff)*1.01,sum_n/omi_y(IMD_cutoff)*0.93))%>%
            dplyr::select(id,cluster,cor_sum_n)%>%
            spread(cluster,cor_sum_n)%>%
            plyr::rename(c("kataegis"="cor_kataegis","omikli"="cor_omikli")))%>%
  mutate(cor_cl_event=cor_kataegis+cor_omikli)%>%
  mutate(cor_tot_event=noncl_TCN+cor_cl_event)%>%
  #arrange(-cor_tot_event)%>%
  dplyr::select(id,cor_kataegis,cor_omikli,sub_project_code,IMD_cutoff,`2and13sum`,cor_tot_event)%>%gather(cor_cluster,cor_sum_n,cor_omikli:cor_kataegis)%>%unique()%>%
  filter(cor_sum_n!=0)%>%
  filter(`2and13sum`>200)%>%
  filter(cor_tot_event>20000)%>%
  arrange(-cor_sum_n)%>%
  filter(cor_cluster=="cor_omikli")
#filter(IMD_cutoff>1140)

left_join(A3A_tot_event_ratio_df%>%filter(clonality=="clonal"|sub_project_code=="A3A")%>%filter(Signature=="SBS2"),
          A3A_tot_event_ratio_df%>%filter(clonality=="clonal"|sub_project_code=="A3A")%>%filter(Signature=="SBS2")%>%
            mutate(cor_sum_n=ifelse(cluster=="kataegis",sum_n/kat_y(IMD_cutoff)*1.01,sum_n/omi_y(IMD_cutoff)*0.93))%>%
            dplyr::select(id,cluster,cor_sum_n)%>%
            spread(cluster,cor_sum_n)%>%
            plyr::rename(c("kataegis"="cor_kataegis","omikli"="cor_omikli")))%>%
  mutate(cor_cl_event=cor_kataegis+cor_omikli)%>%
  mutate(cor_tot_event=noncl_TCN+cor_cl_event)%>%
  #arrange(-cor_tot_event)%>%
  dplyr::select(id,cor_kataegis,cor_omikli,sub_project_code,IMD_cutoff,`2and13sum`,cor_tot_event)%>%gather(cor_cluster,cor_sum_n,cor_omikli:cor_kataegis)%>%unique()%>%
  #  filter(cor_sum_n!=0)%>%
  filter(`2and13sum`>200)%>%
  filter(IMD_cutoff>1140)%>%
  ggplot(aes(x=cor_tot_event,y=cor_sum_n,col=sub_project_code))+
  geom_point()+
  facet_wrap(~cor_cluster,scales="free_y")

cor_fin_df<-left_join(A3A_tot_event_ratio_df%>%filter(clonality=="clonal"|sub_project_code=="A3A")%>%filter(Signature=="SBS2"),
                      A3A_tot_event_ratio_df%>%filter(clonality=="clonal"|sub_project_code=="A3A")%>%filter(Signature=="SBS2")%>%
                        mutate(cor_sum_n=ifelse(cluster=="kataegis",sum_n/kat_y(IMD_cutoff),sum_n/omi_y(IMD_cutoff)))%>%
                        dplyr::select(id,cluster,cor_sum_n)%>%
                        spread(cluster,cor_sum_n)%>%
                        plyr::rename(c("kataegis"="cor_kataegis","omikli"="cor_omikli")))%>%
  mutate(cor_cl_event=cor_kataegis+cor_omikli)%>%
  mutate(cor_tot_event=noncl_TCN+cor_cl_event)%>%
  #arrange(-cor_tot_event)%>%
  dplyr::select(id,cor_kataegis,cor_omikli,sub_project_code,IMD_cutoff,`2and13sum`,cor_tot_event)%>%gather(cor_cluster,cor_sum_n,cor_omikli:cor_kataegis)%>%unique()%>%
  #  filter(cor_sum_n!=0)%>%
  filter(`2and13sum`>500)%>%
  #filter(IMD_cutoff>1140)%>%
  mutate(tissue_class=ifelse(grepl("A3A",id),"A3A","PCAWG"))%>%
  mutate(cor_cl_ratio=cor_sum_n/cor_tot_event)


left_join(A3A_tot_event_ratio_df%>%filter(clonality=="clonal"|sub_project_code=="A3A")%>%filter(Signature=="SBS2"),
          A3A_tot_event_ratio_df%>%filter(clonality=="clonal"|sub_project_code=="A3A")%>%filter(Signature=="SBS2")%>%
            mutate(cor_sum_n=ifelse(cluster=="kataegis",sum_n/kat_y(IMD_cutoff),sum_n/omi_y(IMD_cutoff)))%>%
            dplyr::select(id,cluster,cor_sum_n)%>%
            spread(cluster,cor_sum_n)%>%
            plyr::rename(c("kataegis"="cor_kataegis","omikli"="cor_omikli")))%>%
  mutate(cor_cl_event=cor_kataegis+cor_omikli)%>%
  mutate(cor_tot_event=noncl_TCN+cor_cl_event)%>%
  #arrange(-cor_tot_event)%>%
  dplyr::select(id,cor_kataegis,cor_omikli,sub_project_code,IMD_cutoff,`2and13sum`,cor_tot_event)%>%gather(cor_cluster,cor_sum_n,cor_omikli:cor_kataegis)%>%unique()%>%
  write.table("/home/users/ayh/Projects/27_A3B/06_Figure_code/edit_figure/Fig3/clustered_mutation_event.tot.txt",
              sep="\t",
              quote=F,
              row.names=F)


cor_fin_df<-cor_fin_df%>%mutate(cor_cluster=ifelse(grepl("omikli",cor_cluster),"omikli","kataegis"))
cor_fin_df$cor_cluster<-factor(cor_fin_df$cor_cluster,levels=c("omikli","kataegis"))

cor_fin_df<-cor_fin_df%>%mutate(tissue_class=ifelse(grepl("A3A",tissue_class),"normal organoid","cancer"))
cor_fin_df$tissue_class<-factor(cor_fin_df$tissue_class,levels=c("normal organoid","cancer"))
cor_fin_df<-cor_fin_df%>%mutate(TP53=ifelse(grepl("TP53",id),"KO","WT"),
                    dose=ifelse(grepl("3ug",id),"3ug/ml",
                                ifelse(grepl("100",id),"0.1ug/ml","CTRL"))
                    )%>%
  mutate(sub_class=ifelse(tissue_class=="cancer","cancer",paste(TP53,dose,sep="_")))%>%
  print(n=100)
cor_fin_df$sub_class<-factor(cor_fin_df$sub_class,levels=c("WT_0.1ug/ml","WT_3ug/ml","KO_0.1ug/ml","KO_3ug/ml","cancer"))

cor_fin_ci_df<-cor_fin_df%>%
  group_by(tissue_class,cor_cluster)%>%
  summarise(mean.ratio = mean(cor_cl_ratio, na.rm = TRUE),
            sd.ratio = sd(cor_cl_ratio, na.rm = TRUE),
            n.ratio = n()) %>%
  mutate(se.ratio = sd.ratio / sqrt(n.ratio),
         lower.ci.ratio = mean.ratio - qt(1 - (0.05 / 2), n.ratio - 1) * se.ratio,
         upper.ci.ratio = mean.ratio + qt(1 - (0.05 / 2), n.ratio - 1) * se.ratio)
cor_fin_ci_df

#tissue_class    cor_cluster mean.ratio sd.ratio n.ratio se.ratio lower.ci.ratio upper.ci.ratio
#<chr>           <fct>            <dbl>    <dbl>   <int>    <dbl>          <dbl>          <dbl>
#cancer          omikli         0.0243   0.0176      166 0.00136         0.0216         0.0270 
#cancer          kataegis       0.00646  0.00815     166 0.000632        0.00521        0.00771
#normal organoid omikli         0.0241   0.0121       14 0.00323         0.0171         0.0310 
#normal organoid kataegis       0.00400  0.00306      14 0.000818        0.00223        0.00577

#tissue_class    cor_cluster mean.ratio sd.ratio n.ratio se.ratio lower.ci.ratio upper.ci.ratio
#<fct>           <fct>            <dbl>    <dbl>   <int>    <dbl>          <dbl>          <dbl>
#normal organoid omikli         0.0274   0.00925      12 0.00267         0.0215         0.0333 
#normal organoid kataegis       0.00467  0.00277      12 0.000799        0.00291        0.00643
#cancer          omikli         0.0257   0.0176      146 0.00146         0.0228         0.0285 
#cancer          kataegis       0.00608  0.00674     146 0.000558        0.00497        0.00718
cor_fin_df$tissue_class<-factor(cor_fin_df$tissue_class,levels=c("normal organoid","cancer"))
p_omi<-cor_fin_df%>%filter(cor_cluster=="omikli")%>%
  ggplot(aes(x=sub_class,y=cor_cl_ratio,col=sub_class))+
  geom_boxplot(#position = position_dodge(width =0.9),
    outlier.shape=NA,
    linetype="dashed") +
  stat_boxplot(aes(ymin = ..lower.., ymax = ..upper..), outlier.shape = NA,
               #position = position_dodge(width =0.9)
  ) +
  stat_boxplot(geom = "errorbar", aes(ymin = ..ymax..)) +
  stat_boxplot(geom = "errorbar", aes(ymax = ..ymin..)) +
  geom_point(position = position_jitterdodge(jitter.width = 0.5,
  ))+
  geom_hline(yintercept=c(0.0171,0.0241,0.0310),colour="red")+
  geom_hline(yintercept=c(0.0216,0.0243,0.0270),colour="grey")+
  
  
  theme_classic()+
  theme(axis.title.x=element_blank(),
        axis.text.x=element_text(size=30),
        
        plot.title=element_text(size=30),
        axis.text.y=element_text(size=40),
        axis.title.y=element_text(size=40),
        axis.ticks.length=unit(.4, "cm"),
        axis.ticks = element_line(size = 4)
  )+
  theme(plot.title = element_text(hjust = 0.5))+
  ggtitle("omikli")+
  ylab(paste0("corrected freq. of","\n","cluster event"))+
#  scale_colour_manual(values=c("#DC8D8D","#818ADB"))+
  guides(colour=guide_legend(title="Data"))+
  facet_wrap(~tissue_class,scales="free_x")+
  scale_y_continuous(lim=c(0,0.1))
#  scale_y_continuous(breaks=seq(0,0.06,by=0.02),
#                     labels=seq(0,0.06,by=0.02),
#                     limits=c(0,0.065)
#  )
#tissue_class    cor_cluster mean.ratio sd.ratio n.ratio se.ratio lower.ci.ratio upper.ci.ratio
#<chr>           <fct>            <dbl>    <dbl>   <int>    <dbl>          <dbl>          <dbl>
#cancer          omikli         0.0243   0.0176      166 0.00136         0.0216         0.0270 
#cancer          kataegis       0.00646  0.00815     166 0.000632        0.00521        0.00771
#normal organoid omikli         0.0241   0.0121       14 0.00323         0.0171         0.0310 
#normal organoid kataegis       0.00400  0.00306      14 0.000818        0.00223        0.00577
p_omi
cor_fin_df%>%filter(tissue_class=="normal organoid")%>%
  #arrange(-cor_cl_ratio)%>%
  #print(n=20)%>%
  write.table("/home/users/ayh/Projects/27_A3B/06_Figure_code/edit_figure/Fig3/cl_ratio.APOBEC.500.txt",sep="\t",quote=F,row.names=F)

cor_fin_df%>%filter(tissue_class!="normal organoid")%>%
  #arrange(-cor_cl_ratio)%>%
  #print(n=20)%>%
  write.table("/home/users/ayh/Projects/27_A3B/06_Figure_code/edit_figure/Fig3/cl_ratio.pcawg.APOBEC.500.txt",sep="\t",quote=F,row.names=F)


cor_fin_df%>%filter(tissue_class!="normal organoid")
cor_fin_df%>%filter(tissue_class=="normal organoid")%>%
  #arrange(-cor_cl_ratio)%>%
  #print(n=20)%>%
  write.table("/home/users/ayh/Projects/27_A3B/06_Figure_code/edit_figure/Fig3/cl_ratio.txt",sep="\t",quote=F,row.names=F)
p_kat<-cor_fin_df%>%filter(cor_cluster=="kataegis")%>%
  ggplot(aes(x=sub_class,y=cor_cl_ratio,col=sub_class))+
  geom_boxplot(#position = position_dodge(width =0.9),
    outlier.shape=NA,
    linetype="dashed") +
  stat_boxplot(aes(ymin = ..lower.., ymax = ..upper..), outlier.shape = NA,
               #position = position_dodge(width =0.9)
  ) +
  stat_boxplot(geom = "errorbar", aes(ymin = ..ymax..)) +
  stat_boxplot(geom = "errorbar", aes(ymax = ..ymin..)) +
  geom_point(position = position_jitterdodge(jitter.width = 0.5,
  ))+
  
  geom_hline(yintercept=c(0.00400,0.00223,0.00577),colour="red")+
  geom_hline(yintercept=c(0.00646,0.00521,0.00771),colour="grey")+
  
  theme_classic()+
  theme(axis.title.x=element_blank(),
        axis.text.x=element_text(size=30),
        
        plot.title=element_text(size=30),
        axis.text.y=element_text(size=40),
        axis.title.y=element_text(size=40),
        axis.ticks.length=unit(.4, "cm"),
        axis.ticks = element_line(size = 4)
  )+
  theme(plot.title = element_text(hjust = 0.5))+
  ggtitle("kataegis")+
  ylab(paste0("corrected freq. of","\n","cluster event"))+
  #scale_colour_manual(values=c("#DC8D8D","#818ADB"))+
  guides(colour=guide_legend(title="Data"))+
  scale_y_continuous(breaks=seq(0,0.06,by=0.02),
                     labels=seq(0,0.06,by=0.02),
                     limits=c(0,0.065))+
  facet_wrap(~tissue_class,scales="free_x")+
  scale_y_continuous(lim=c(0,0.1))
p_kat
ggsave("/home/users/ayh/Projects/27_A3B/06_Figure_code/edit_figure/Fig3/freq_omikli.v5.pdf",p_omi,
       width=8,
       height=6.8)


ggsave("/home/users/ayh/Projects/27_A3B/06_Figure_code/edit_figure/Fig3/freq_kataegis.v5.pdf",p_kat,
       width=8,
       height=6.8)

cor_fin_df%>%
  write.table("/home/users/ayh/Projects/27_A3B/06_Figure_code/edit_figure/Fig3/cluster_freq.APOBEC.500.txt",
              sep="\t",
              quote=F,
              row.names=F)
#tissue_class    cor_cluster mean.ratio sd.ratio n.ratio se.ratio lower.ci.ratio upper.ci.ratio
#<fct>           <fct>            <dbl>    <dbl>   <int>    <dbl>          <dbl>          <dbl>
#normal organoid omikli         0.0274   0.00925      12 0.00267         0.0215         0.0333 
#normal organoid kataegis       0.00467  0.00277      12 0.000799        0.00291        0.00643
#cancer          omikli         0.0257   0.0176      146 0.00146         0.0228         0.0285 
#cancer          kataegis       0.00608  0.00674     146 0.000558        0.00497        0.00718
p_merge<-cor_fin_df%>%#%>%filter(cor_cluster=="omikli")%>%
  ggplot(aes(x=sub_class,y=cor_cl_ratio,col=cor_cluster))+
  geom_boxplot(#position = position_dodge(width =0.9),
    outlier.shape=NA,
    linetype="dashed") +
  stat_boxplot(aes(ymin = ..lower.., ymax = ..upper..), outlier.shape = NA,
               #position = position_dodge(width =0.9)
  ) +
  stat_boxplot(geom = "errorbar", aes(ymin = ..ymax..)) +
  stat_boxplot(geom = "errorbar", aes(ymax = ..ymin..)) +
  geom_point(position = position_jitterdodge(jitter.width = 0.3,
  ))+
  geom_hline(yintercept=c(0.0215,0.0274,0.0333),colour="red")+
  geom_hline(yintercept=c(0.0228,0.0257,0.0285),colour="grey")+
    geom_hline(yintercept=c(0.00291,0.00467,0.00643),colour="red")+
    geom_hline(yintercept=c(0.00497,0.00608,0.00718),colour="grey")+
  
  
  theme_classic()+
  theme(axis.title.x=element_blank(),
        axis.text.x=element_text(size=30),
        
        plot.title=element_text(size=30),
        axis.text.y=element_text(size=40),
        axis.title.y=element_text(size=40),
        axis.ticks.length=unit(.4, "cm"),
        axis.ticks = element_line(size = 4)
  )+
  theme(plot.title = element_text(hjust = 0.5))+
  #ggtitle("omikli")+
  ylab(paste0("corrected freq. of","\n","cluster event"))+
  #  scale_colour_manual(values=c("#DC8D8D","#818ADB"))+
  guides(colour=guide_legend(title="Data"))+
  facet_wrap(~tissue_class,scales="free_x")+
  scale_y_continuous(lim=c(0,0.1))
ggsave("/home/users/ayh/Projects/27_A3B/06_Figure_code/edit_figure/Fig3/freq_merge.v5.pdf",p_merge,
       width=8,
       height=6.8)

#scale_y_log10()
A3A_tot_event_ratio_df
left_join(A3A_tot_event_ratio_df%>%filter(clonality=="clonal"|sub_project_code=="A3A")%>%filter(Signature=="SBS2"),
          A3A_tot_event_ratio_df%>%filter(clonality=="clonal"|sub_project_code=="A3A")%>%filter(Signature=="SBS2")%>%
            mutate(cor_sum_n=ifelse(cluster=="kataegis",sum_n/kat_y(IMD_cutoff)*1.01,sum_n/omi_y(IMD_cutoff)*0.93))%>%
            dplyr::select(id,cluster,cor_sum_n)%>%
            spread(cluster,cor_sum_n)%>%
            plyr::rename(c("kataegis"="cor_kataegis","omikli"="cor_omikli")))%>%
  mutate(cor_cl_event=cor_kataegis+cor_omikli)%>%
  mutate(cor_tot_event=noncl_TCN+cor_cl_event)%>%
  #arrange(-cor_tot_event)%>%
  dplyr::select(id,cor_kataegis,cor_omikli,sub_project_code,IMD_cutoff,`2and13sum`,cor_tot_event)%>%gather(cor_cluster,cor_sum_n,cor_omikli:cor_kataegis)%>%
  mutate(cor_cl_ratio=cor_sum_n/cor_tot_event)%>%unique()%>%
  filter(cor_sum_n!=0)%>%
  #filter(`2and13sum`>200)%>%
  #filter(IMD_cutoff>1758)%>%
  ggplot(aes(x=sub_project_code,y=cor_cl_ratio,col=sub_project_code))+
  geom_boxplot()+
  geom_jitter()+
  facet_wrap(~cor_cluster,scales="free_y")


left_join(A3A_tot_event_ratio_df%>%filter(clonality=="clonal"|sub_project_code=="A3A")%>%filter(Signature=="SBS2"),
          A3A_tot_event_ratio_df%>%filter(clonality=="clonal"|sub_project_code=="A3A")%>%filter(Signature=="SBS2")%>%
            mutate(cor_sum_n=ifelse(cluster=="kataegis",sum_n/kat_y(IMD_cutoff)*1.01,sum_n/omi_y(IMD_cutoff)*0.93))%>%
            dplyr::select(id,cluster,cor_sum_n)%>%
            spread(cluster,cor_sum_n)%>%
            plyr::rename(c("kataegis"="cor_kataegis","omikli"="cor_omikli")))%>%
  mutate(cor_cl_event=cor_kataegis+cor_omikli)%>%
  mutate(cor_tot_event=noncl_TCN+cor_cl_event)%>%
  #arrange(-cor_tot_event)%>%
  dplyr::select(id,cor_kataegis,cor_omikli,sub_project_code,IMD_cutoff,`2and13sum`,cor_tot_event)%>%gather(cor_cluster,cor_sum_n,cor_omikli:cor_kataegis)%>%
  mutate(cor_cl_ratio=cor_sum_n/cor_tot_event)%>%unique()%>%
  filter(cor_sum_n!=0)%>%
  filter(`2and13sum`>200)%>%
  filter(IMD_cutoff>1140)%>%
  ggplot(aes(x=sub_project_code,y=cor_cl_ratio,col=sub_project_code))+
  geom_boxplot(outlier.shape=NA)+
  geom_jitter()+
  facet_wrap(~cor_cluster,scales="free_y")


left_join(A3A_tot_event_ratio_df%>%filter(clonality=="clonal"|sub_project_code=="A3A")%>%filter(Signature=="SBS2"),
          A3A_tot_event_ratio_df%>%filter(clonality=="clonal"|sub_project_code=="A3A")%>%filter(Signature=="SBS2")%>%
            mutate(cor_sum_n=ifelse(cluster=="kataegis",sum_n/kat_y(IMD_cutoff)*1.01,sum_n/omi_y(IMD_cutoff)*0.93))%>%
            dplyr::select(id,cluster,cor_sum_n)%>%
            spread(cluster,cor_sum_n)%>%
            plyr::rename(c("kataegis"="cor_kataegis","omikli"="cor_omikli")))%>%
  mutate(cor_cl_event=cor_kataegis+cor_omikli)%>%
  mutate(cor_tot_event=noncl_TCN+cor_cl_event)%>%
  #arrange(-cor_tot_event)%>%
  dplyr::select(id,cor_kataegis,cor_omikli,sub_project_code,IMD_cutoff,`2and13sum`,cor_tot_event)%>%gather(cor_cluster,cor_sum_n,cor_omikli:cor_kataegis)%>%
  mutate(cor_cl_ratio=cor_sum_n/cor_tot_event)%>%unique()%>%
  #  filter(cor_sum_n!=0)%>%
  filter(`2and13sum`>200)%>%
  filter(IMD_cutoff>1140)%>%
  filter(cor_cluster=="cor_kataegis")%>%
  mutate(class_type=ifelse(grepl("A3A",sub_project_code),"A3A","PCAWG"))%>%
  ggplot(aes(x=cor_tot_event,y=cor_sum_n))+
  geom_point()+
  facet_wrap(~cor_cluster+class_type)+
  geom_smooth(method="lm",fullrange=TRUE)+
  theme_bw()


left_join(A3A_tot_event_ratio_df%>%filter(clonality=="clonal"|sub_project_code=="A3A")%>%filter(Signature=="SBS2"),
          A3A_tot_event_ratio_df%>%filter(clonality=="clonal"|sub_project_code=="A3A")%>%filter(Signature=="SBS2")%>%
            mutate(cor_sum_n=ifelse(cluster=="kataegis",sum_n/kat_y(IMD_cutoff)*1.01,sum_n/omi_y(IMD_cutoff)*0.93))%>%
            dplyr::select(id,cluster,cor_sum_n)%>%
            spread(cluster,cor_sum_n)%>%
            plyr::rename(c("kataegis"="cor_kataegis","omikli"="cor_omikli")))%>%
  mutate(cor_cl_event=cor_kataegis+cor_omikli)%>%
  mutate(cor_tot_event=noncl_TCN+cor_cl_event)%>%
  #arrange(-cor_tot_event)%>%
  dplyr::select(id,cor_kataegis,cor_omikli,sub_project_code,IMD_cutoff,`2and13sum`,cor_tot_event)%>%gather(cor_cluster,cor_sum_n,cor_omikli:cor_kataegis)%>%
  mutate(cor_cl_ratio=cor_sum_n/cor_tot_event)%>%unique()%>%
  filter(cor_sum_n!=0)%>%
  filter(`2and13sum`>200)%>%
  filter(IMD_cutoff>1140)%>%
  filter(cor_cluster=="cor_omikli")%>%
  mutate(class_type=ifelse(grepl("A3A",sub_project_code),"A3A","PCAWG"))%>%
  filter(class_type=="A3A")%>%
  lm(cor_sum_n~cor_tot_event,data=.)%>%
  coef()

left_join(A3A_tot_event_ratio_df%>%filter(clonality=="clonal"|sub_project_code=="A3A")%>%filter(Signature=="SBS2"),
          A3A_tot_event_ratio_df%>%filter(clonality=="clonal"|sub_project_code=="A3A")%>%filter(Signature=="SBS2")%>%
            mutate(cor_sum_n=ifelse(cluster=="kataegis",sum_n/kat_y(IMD_cutoff)*1.01,sum_n/omi_y(IMD_cutoff)*0.93))%>%
            dplyr::select(id,cluster,cor_sum_n)%>%
            spread(cluster,cor_sum_n)%>%
            plyr::rename(c("kataegis"="cor_kataegis","omikli"="cor_omikli")))%>%
  mutate(cor_cl_event=cor_kataegis+cor_omikli)%>%
  mutate(cor_tot_event=noncl_TCN+cor_cl_event)%>%
  #arrange(-cor_tot_event)%>%
  dplyr::select(id,cor_kataegis,cor_omikli,sub_project_code,IMD_cutoff,`2and13sum`,cor_tot_event)%>%gather(cor_cluster,cor_sum_n,cor_omikli:cor_kataegis)%>%
  mutate(cor_cl_ratio=cor_sum_n/cor_tot_event)%>%unique()%>%
  filter(cor_sum_n!=0)%>%
  filter(`2and13sum`>200)%>%
  filter(IMD_cutoff>1140)%>%
  filter(cor_cluster=="cor_omikli")%>%
  mutate(class_type=ifelse(grepl("A3A",sub_project_code),"A3A","PCAWG"))%>%
  filter(class_type=="PCAWG")%>%
  lm(cor_sum_n~cor_tot_event,data=.)%>%
  coef()

left_join(A3A_tot_event_ratio_df%>%filter(clonality=="clonal"|sub_project_code=="A3A")%>%filter(Signature=="SBS2"),
          A3A_tot_event_ratio_df%>%filter(clonality=="clonal"|sub_project_code=="A3A")%>%filter(Signature=="SBS2")%>%
            mutate(cor_sum_n=ifelse(cluster=="kataegis",sum_n/kat_y(IMD_cutoff)*1.01,sum_n/omi_y(IMD_cutoff)*0.93))%>%
            dplyr::select(id,cluster,cor_sum_n)%>%
            spread(cluster,cor_sum_n)%>%
            plyr::rename(c("kataegis"="cor_kataegis","omikli"="cor_omikli")))%>%
  mutate(cor_cl_event=cor_kataegis+cor_omikli)%>%
  mutate(cor_tot_event=noncl_TCN+cor_cl_event)%>%
  #arrange(-cor_tot_event)%>%
  dplyr::select(id,cor_kataegis,cor_omikli,sub_project_code,IMD_cutoff,`2and13sum`,cor_tot_event)%>%gather(cor_cluster,cor_sum_n,cor_omikli:cor_kataegis)%>%
  mutate(cor_cl_ratio=cor_sum_n/cor_tot_event)%>%unique()%>%
  filter(cor_sum_n!=0)%>%
  filter(`2and13sum`>200)%>%
  filter(IMD_cutoff>1140)%>%
  filter(cor_cluster=="cor_kataegis")%>%
  mutate(class_type=ifelse(grepl("A3A",sub_project_code),"A3A","PCAWG"))%>%
  filter(class_type=="A3A")%>%
  lm(cor_sum_n~cor_tot_event,data=.)%>%
  coef()

left_join(A3A_tot_event_ratio_df%>%filter(clonality=="clonal"|sub_project_code=="A3A")%>%filter(Signature=="SBS2"),
          A3A_tot_event_ratio_df%>%filter(clonality=="clonal"|sub_project_code=="A3A")%>%filter(Signature=="SBS2")%>%
            mutate(cor_sum_n=ifelse(cluster=="kataegis",sum_n/kat_y(IMD_cutoff)*1.01,sum_n/omi_y(IMD_cutoff)*0.93))%>%
            dplyr::select(id,cluster,cor_sum_n)%>%
            spread(cluster,cor_sum_n)%>%
            plyr::rename(c("kataegis"="cor_kataegis","omikli"="cor_omikli")))%>%
  mutate(cor_cl_event=cor_kataegis+cor_omikli)%>%
  mutate(cor_tot_event=noncl_TCN+cor_cl_event)%>%
  #arrange(-cor_tot_event)%>%
  dplyr::select(id,cor_kataegis,cor_omikli,sub_project_code,IMD_cutoff,`2and13sum`,cor_tot_event)%>%gather(cor_cluster,cor_sum_n,cor_omikli:cor_kataegis)%>%
  mutate(cor_cl_ratio=cor_sum_n/cor_tot_event)%>%unique()%>%
  filter(cor_sum_n!=0)%>%
  filter(`2and13sum`>200)%>%
  filter(IMD_cutoff>1140)%>%
  filter(cor_cluster=="cor_kataegis")%>%
  mutate(class_type=ifelse(grepl("A3A",sub_project_code),"A3A","PCAWG"))%>%
  filter(class_type=="PCAWG")%>%
  lm(cor_sum_n~cor_tot_event,data=.)%>%
  coef()

##omikli, A3A###
#(Intercept) cor_tot_event
#-6.34329738    0.03048535

##omikli,PCAWG###
#(Intercept) cor_tot_event
#-12.75725057    0.04160793

##kataegis,A3A###
#(Intercept) cor_tot_event
#0.421191828   0.004633528

##kataegis,PCAWG###
#(Intercept) cor_tot_event
#5.527820009   0.009625028


left_join(A3A_tot_event_ratio_df%>%filter(clonality=="clonal"|sub_project_code=="A3A")%>%filter(Signature=="SBS2"),
          A3A_tot_event_ratio_df%>%filter(clonality=="clonal"|sub_project_code=="A3A")%>%filter(Signature=="SBS2")%>%
            mutate(cor_sum_n=ifelse(cluster=="kataegis",sum_n/kat_y(IMD_cutoff)*1.01,sum_n/omi_y(IMD_cutoff)*0.93))%>%
            dplyr::select(id,cluster,cor_sum_n)%>%
            spread(cluster,cor_sum_n)%>%
            plyr::rename(c("kataegis"="cor_kataegis","omikli"="cor_omikli")))%>%
  mutate(cor_cl_event=cor_kataegis+cor_omikli)%>%
  mutate(cor_tot_event=noncl_TCN+cor_cl_event)%>%
  #arrange(-cor_tot_event)%>%
  dplyr::select(id,cor_kataegis,cor_omikli,sub_project_code,IMD_cutoff,`2and13sum`,cor_tot_event)%>%gather(cor_cluster,cor_sum_n,cor_omikli:cor_kataegis)%>%
  mutate(cor_cl_ratio=cor_sum_n/cor_tot_event)%>%unique()%>%
  filter(cor_sum_n!=0)%>%
  filter(`2and13sum`>200)%>%
  filter(IMD_cutoff>1140)%>%
  filter(cor_cluster=="cor_omikli")%>%
  mutate(class_type=ifelse(grepl("A3A",sub_project_code),"A3A","PCAWG"))%>%
  ggplot(aes(x=cor_tot_event,y=cor_sum_n))+
  geom_point()+
  facet_wrap(~cor_cluster+class_type)+
  geom_smooth(method="lm",fullrange=TRUE)+
  theme_bw()




left_join(A3A_tot_event_ratio_df%>%filter(clonality=="clonal"|sub_project_code=="A3A")%>%filter(Signature=="SBS2"),
          A3A_tot_event_ratio_df%>%filter(clonality=="clonal"|sub_project_code=="A3A")%>%filter(Signature=="SBS2")%>%
            mutate(cor_sum_n=ifelse(cluster=="kataegis",sum_n/kat_y(IMD_cutoff)*1.01,sum_n/omi_y(IMD_cutoff)*0.93))%>%
            dplyr::select(id,cluster,cor_sum_n)%>%
            spread(cluster,cor_sum_n)%>%
            plyr::rename(c("kataegis"="cor_kataegis","omikli"="cor_omikli")))%>%
  mutate(cor_cl_event=cor_kataegis+cor_omikli)%>%
  mutate(cor_tot_event=noncl_TCN+cor_cl_event)%>%
  #arrange(-cor_tot_event)%>%
  dplyr::select(id,cor_kataegis,cor_omikli,sub_project_code,IMD_cutoff,`2and13sum`,cor_tot_event)%>%gather(cor_cluster,cor_sum_n,cor_omikli:cor_kataegis)%>%
  mutate(cor_cl_ratio=cor_sum_n/cor_tot_event)%>%unique()%>%
  filter(cor_sum_n!=0)%>%
  filter(`2and13sum`>200)%>%
  filter(IMD_cutoff>1140)%>%
  filter(cor_cluster=="cor_kataegis")%>%
  mutate(class_type=ifelse(grepl("A3A",sub_project_code),"A3A","PCAWG"))%>%
  ggplot(aes(x=cor_tot_event,y=cor_sum_n))+
  geom_point()+
  facet_wrap(~cor_cluster+class_type)+
  geom_smooth(method="lm",fullrange=TRUE)+
  theme_bw()


left_join(A3A_tot_event_ratio_df%>%filter(clonality=="clonal"|sub_project_code=="A3A")%>%filter(Signature=="SBS2"),
          A3A_tot_event_ratio_df%>%filter(clonality=="clonal"|sub_project_code=="A3A")%>%filter(Signature=="SBS2")%>%
            mutate(cor_sum_n=ifelse(cluster=="kataegis",sum_n/kat_y(IMD_cutoff)*1.01,sum_n/omi_y(IMD_cutoff)*0.93))%>%
            dplyr::select(id,cluster,cor_sum_n)%>%
            spread(cluster,cor_sum_n)%>%
            plyr::rename(c("kataegis"="cor_kataegis","omikli"="cor_omikli")))%>%
  mutate(cor_cl_event=cor_kataegis+cor_omikli)%>%
  mutate(cor_tot_event=noncl_TCN+cor_cl_event)%>%
  #arrange(-cor_tot_event)%>%
  dplyr::select(id,cor_kataegis,cor_omikli,sub_project_code,IMD_cutoff,`2and13sum`,cor_tot_event)%>%gather(cor_cluster,cor_sum_n,cor_omikli:cor_kataegis)%>%
  mutate(cor_cl_ratio=cor_sum_n/cor_tot_event)%>%unique()%>%
  #  filter(cor_sum_n!=0)%>%
  filter(`2and13sum`>200)%>%
  #  filter(IMD_cutoff>1140)%>%
  filter(cor_cluster=="cor_omikli")%>%
  mutate(class_type=ifelse(grepl("A3A",sub_project_code),"A3A","PCAWG"))%>%
  ggplot(aes(x=cor_tot_event,y=cor_sum_n))+
  geom_point()+
  facet_wrap(~cor_cluster+class_type)+
  geom_smooth(method="lm",fullrange=TRUE)+
  theme_bw()

left_join(A3A_tot_event_ratio_df%>%filter(clonality=="clonal"|sub_project_code=="A3A")%>%filter(Signature=="SBS2"),
          A3A_tot_event_ratio_df%>%filter(clonality=="clonal"|sub_project_code=="A3A")%>%filter(Signature=="SBS2")%>%
            mutate(cor_sum_n=ifelse(cluster=="kataegis",sum_n/kat_y(IMD_cutoff)*1.01,sum_n/omi_y(IMD_cutoff)*0.93))%>%
            dplyr::select(id,cluster,cor_sum_n)%>%
            spread(cluster,cor_sum_n)%>%
            plyr::rename(c("kataegis"="cor_kataegis","omikli"="cor_omikli")))%>%
  mutate(cor_cl_event=cor_kataegis+cor_omikli)%>%
  mutate(cor_tot_event=noncl_TCN+cor_cl_event)%>%
  #arrange(-cor_tot_event)%>%
  dplyr::select(id,cor_kataegis,cor_omikli,sub_project_code,IMD_cutoff,`2and13sum`,cor_tot_event)%>%gather(cor_cluster,cor_sum_n,cor_omikli:cor_kataegis)%>%
  mutate(cor_cl_ratio=cor_sum_n/cor_tot_event)%>%unique()%>%
  filter(cor_sum_n!=0)%>%
  filter(`2and13sum`>200)%>%
  #  filter(IMD_cutoff>1140)%>%
  filter(cor_cluster=="cor_omikli")%>%
  mutate(class_type=ifelse(grepl("A3A",sub_project_code),"A3A","PCAWG"))%>%
  ggplot(aes(x=cor_tot_event,y=cor_sum_n))+
  geom_point()+
  facet_wrap(~cor_cluster+class_type)+
  geom_smooth(method="lm",fullrange=TRUE)+
  theme_bw()



A3A_tot_event_ratio_df%>%filter(clonality=="clonal"|sub_project_code=="A3A")%>%filter(Signature=="SBS2")%>%
  filter(id=="21205681-a0c7-48d0-81d4-0b7ef31158cf")


group_by(sub_project_code,cluster)%>%
  dplyr::summarise(med_cutoff=median(cl_ratio))%>%spread(cluster,med_cutoff)
A3A_tot_event_ratio_df%>%filter(clonality=="clonal"|sub_project_code=="A3A")%>%filter(Signature=="SBS2")%>%
  filter(IMD_cutoff>2832)%>%
  group_by(sub_project_code,cluster)%>%
  dplyr::summarise(med_cutoff=median(cl_ratio))%>%spread(cluster,med_cutoff)
#0%   25%   50%   75%  100%
#31   255   784  2047 10000

#0%   25%   50%   75%  100%
#31   255   762  2047 10000

#0%   25%   50%   75%  100%
#31   255   976  2047 10000
#summary(lm(sum_n ~ tot_event + imd_group + tot_event*imd_group, data=tmp_A3A_group_df%>%filter(clonality=="clonal"|sub_project_code=="A3A")%>%filter(Signature=="SBS2")%>%
#             +      filter(sub_project_code%in%target_type)%>%filter(cluster=="omikli")%>%filter()))

#Call:
#  lm(formula = sum_n ~ tot_event + imd_group + tot_event * imd_group,
#     data = tmp_A3A_group_df %>% filter(clonality == "clonal" |
#                                          sub_project_code == "A3A") %>% filter(Signature == "SBS2") %>%
#       filter(sub_project_code %in% target_type) %>% filter(cluster ==
#                                                             "omikli") %>% filter())

#Residuals:
#  Min      1Q  Median      3Q     Max
#-250.52  -17.95    0.41   10.41  372.44

#Coefficients:
#  Estimate Std. Error t value Pr(>|t|)
#(Intercept)          -2.543e+00  1.480e+01  -0.172 0.863787
#tot_event             3.535e-02  9.948e-03   3.554 0.000496 ***
#  imd_group2           -4.776e+00  1.963e+01  -0.243 0.808127
#imd_group3           -4.210e+01  2.010e+01  -2.094 0.037768 *
#  imd_group4            1.548e+01  2.097e+01   0.738 0.461556
#tot_event:imd_group2 -3.437e-03  1.089e-02  -0.316 0.752688
#tot_event:imd_group3 -8.570e-05  1.007e-02  -0.009 0.993218
#tot_event:imd_group4 -2.708e-02  9.989e-03  -2.711 0.007418 **
#  ---
#  Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

#Residual standard error: 68.99 on 164 degrees of freedom
#Multiple R-squared:  0.8196,   Adjusted R-squared:  0.8119
#F-statistic: 106.4 on 7 and 164 DF,  p-value: < 2.2e-16

tmp_A3A_group_df<-A3A_tot_event_ratio_df%>%filter(clonality=="clonal"|sub_project_code=="A3A")%>%filter(Signature=="SBS2")%>%
  mutate(imd_group=ifelse(IMD_cutoff>2047,"1",
                          ifelse(IMD_cutoff>784,"2",
                                 ifelse(IMD_cutoff>255,"3","4"))))

tmp_A3A_group_df%>%filter(clonality=="clonal"|sub_project_code=="A3A")%>%filter(Signature=="SBS2")%>%
  filter(sub_project_code%in%c("HNSC","BLCA","BRCA","A3A"))%>%filter(cluster=="omikli")%>%
  filter(imd_group%in%c(1,2,3))%>%
  ggplot(aes(x=tot_event,y=sum_n))+
  geom_point(aes(col=sub_project_code))+
  facet_wrap(~project_type)+
  geom_smooth(method="lm",fullrange=TRUE)+
  theme_bw()


tmp_A3A_group_df%>%filter(clonality=="clonal"|sub_project_code=="A3A")%>%filter(Signature=="SBS2")%>%
  filter(sub_project_code%in%c("HNSC","BLCA","BRCA","A3A"))%>%filter(cluster=="kataegis")%>%
  filter(imd_group%in%c(1,2,3))%>%
  ggplot(aes(x=tot_event,y=sum_n,size=IMD_cutoff))+
  geom_point(aes(col=sub_project_code))+
  facet_wrap(~project_type)+
  geom_smooth(method="lm",fullrange=TRUE)+
  theme_bw()


tmp_A3A_group_df%>%filter(clonality=="clonal"|sub_project_code=="A3A")%>%filter(Signature=="SBS2")%>%
  filter(sub_project_code%in%c("HNSC","BLCA","BRCA","A3A"))%>%filter(cluster=="omikli")%>%
  filter(imd_group%in%c(1,2,3))%>%
  ggplot(aes(x=tot_event,y=sum_n,size=IMD_cutoff))+
  geom_point(aes(col=sub_project_code))+
  facet_wrap(~sub_project_code,ncol=4)+
  geom_smooth(method="lm",fullrange=TRUE)+
  theme_bw()+
  guides(size=FALSE)


tmp_A3A_group_df%>%filter(clonality=="clonal"|sub_project_code=="A3A")%>%filter(Signature=="SBS2")%>%
  filter(sub_project_code%in%c("HNSC","BLCA","BRCA","A3A"))%>%filter(cluster=="kataegis")%>%
  filter(imd_group%in%c(1,2,3))%>%
  ggplot(aes(x=tot_event,y=sum_n,size=IMD_cutoff))+
  geom_point(aes(col=sub_project_code))+
  facet_wrap(~sub_project_code,ncol=4)+
  geom_smooth(method="lm",fullrange=TRUE)+
  theme_bw()+
  guides(size=FALSE)


tmp_A3A_group_df%>%filter(clonality=="clonal"|sub_project_code=="A3A")%>%filter(Signature=="SBS2")%>%
  filter(sub_project_code%in%c("HNSC","BLCA","BRCA","A3A"))%>%
  filter(imd_group%in%c(1,2,3))%>%
  ggplot(aes(x=tot_event,y=sum_n,size=IMD_cutoff))+
  geom_point(aes(col=sub_project_code))+
  facet_wrap(~sub_project_code+cluster,ncol=4)+
  geom_smooth(method="lm",fullrange=TRUE)+
  theme_bw()


tmp_A3A_group_df%>%filter(clonality=="clonal"|sub_project_code=="A3A")%>%filter(Signature=="SBS2")%>%
  filter(sub_project_code%in%c("HNSC","BLCA","BRCA","A3A"))%>%
  filter(imd_group%in%c(1,2,3))%>%
  ggplot(aes(x=sub_project_code,y=cl_ratio,col=sub_project_code))+
  geom_boxplot(outlier.shape=NA)+
  geom_point(position=position_dodge2(width=0.5))+
  facet_wrap(~cluster)+
  
  theme_bw()


tmp_A3A_group_df%>%filter(clonality=="clonal"|sub_project_code=="A3A")%>%filter(Signature=="SBS2")%>%
  filter(sub_project_code%in%c("HNSC","BLCA","BRCA","A3A"))%>%
  filter(imd_group%in%c(1,2,3))%>%
  group_by(cluster)%>%
  rstatix::wilcox_test(cl_ratio~sub_project_code,
                       p.adjust.method = "BH"
  )


#cluster  .y.      group1 group2    n1    n2 statistic        p p.adj p.adj.signif
#* <chr>    <chr>    <chr>  <chr>  <int> <int>     <dbl>    <dbl> <dbl> <chr>
#  1 kataegis cl_ratio A3A    BLCA      16    16       66  0.02     0.115 ns
#2 kataegis cl_ratio A3A    BRCA      16    61      384  0.191    0.35  ns
#3 kataegis cl_ratio A3A    HNSC      16    17      138  0.957    0.957 ns
#4 kataegis cl_ratio BLCA   BRCA      16    61      545  0.477    0.572 ns
#5 kataegis cl_ratio BLCA   HNSC      16    17      194  0.038    0.115 ns
#6 kataegis cl_ratio BRCA   HNSC      61    17      617  0.233    0.35  ns
#7 omikli   cl_ratio A3A    BLCA      16    16       53  0.004    0.012 *
#8 omikli   cl_ratio A3A    BRCA      16    61      580. 0.253    0.38  ns
#9 omikli   cl_ratio A3A    HNSC      16    17      145  0.763    0.763 ns
#10 omikli   cl_ratio BLCA   BRCA      16    61      757  0.000745 0.004 **
#11 omikli   cl_ratio BLCA   HNSC      16    17      208  0.009    0.017 *
#12 omikli   cl_ratio BRCA   HNSC      61    17      466  0.529    0.635 ns
summary(lm(sum_n ~ tot_event,  data=t_df%>%filter(sub_project_code=="A3A")%>%filter(cluster=="omikli")))$coefficient[2]
summary(lm(sum_n ~ tot_event,  data=t_df%>%filter(sub_project_code=="BLCA")%>%filter(cluster=="omikli")))$coefficient[2]
summary(lm(sum_n ~ tot_event,  data=t_df%>%filter(sub_project_code=="BRCA")%>%filter(cluster=="omikli")))$coefficient[2]
summary(lm(sum_n ~ tot_event,  data=t_df%>%filter(sub_project_code=="HNSC")%>%filter(cluster=="omikli")))$coefficient[2]

summary(lm(sum_n ~ tot_event,  data=t_df%>%filter(sub_project_code=="A3A")%>%filter(cluster=="kataegis")))$coefficient[2]
summary(lm(sum_n ~ tot_event,  data=t_df%>%filter(sub_project_code=="BLCA")%>%filter(cluster=="kataegis")))$coefficient[2]
summary(lm(sum_n ~ tot_event,  data=t_df%>%filter(sub_project_code=="BRCA")%>%filter(cluster=="kataegis")))$coefficient[2]
summary(lm(sum_n ~ tot_event,  data=t_df%>%filter(sub_project_code=="HNSC")%>%filter(cluster=="kataegis")))$coefficient[2]


t_df<-tmp_A3A_group_df%>%filter(clonality=="clonal"|sub_project_code=="A3A")%>%filter(Signature=="SBS2")%>%
  filter(imd_group%in%c(1,2,3))
fit_omi_mean<-lapply(c(1:500),function(x){
  sample_omi_df<-lapply(c("BLCA","BRCA","HNSC"),function(y){
    sample_n(t_df%>%filter(cluster=="omikli")%>%filter(sub_project_code==y),10)
  })
  sample_omi_merge_df<-do.call(rbind,sample_omi_df)
  data.frame("int"=c(summary(lm(sum_n ~ tot_event,  data=sample_omi_merge_df))$coeff[1]),
             "slope"=c(summary(lm(sum_n ~ tot_event,  data=sample_omi_merge_df))$coeff[2]
             )
  )
  
})


fit_kat_mean<-lapply(c(1:500),function(x){
  sample_omi_df<-lapply(c("BLCA","BRCA","HNSC"),function(y){
    sample_n(t_df%>%filter(cluster=="kataegis")%>%filter(sub_project_code==y),10)
  })
  sample_omi_merge_df<-do.call(rbind,sample_omi_df)
  data.frame("int"=c(summary(lm(sum_n ~ tot_event,  data=sample_omi_merge_df))$coeff[1]),
             "slope"=c(summary(lm(sum_n ~ tot_event,  data=sample_omi_merge_df))$coeff[2]
             )
  )
  
})

fit_omi_mean_df<-do.call(rbind,fit_omi_mean)
fit_kat_mean_df<-do.call(rbind,fit_kat_mean)
fit_omi_mean_df$slope%>%mean()
fit_omi_mean_df$int%>%mean()
fit_kat_mean_df$slope%>%mean()
fit_kat_mean_df$int%>%mean()

#omi
#[1] 0.03566574
#[1] -18.9157
#kat
#[1] 0.005556589
#[1] 2.500038
(summary(lm(sum_n~tot_event,data=t_df%>%filter(cluster=="omikli",sub_project_code=="A3A"))))$coeff[1]
(summary(lm(sum_n~tot_event,data=t_df%>%filter(cluster=="omikli",sub_project_code=="A3A"))))$coeff[2]
(summary(lm(sum_n~tot_event,data=t_df%>%filter(cluster=="kataegis",sub_project_code=="A3A"))))$coeff[1]
(summary(lm(sum_n~tot_event,data=t_df%>%filter(cluster=="kataegis",sub_project_code=="A3A"))))$coeff[2]

##omi
[1] -4.256075
[1] 0.02965902
##kat
[1] 0.03901723
[1] 0.004702843

tmp_A3A_group_df<-tmp_A3A_group_df%>%mutate(project_type=ifelse(sub_project_code=="A3A","A3A","pcawg"))
group_count<-tmp_A3A_group_df%>%filter(clonality=="clonal"|sub_project_code=="A3A")%>%filter(Signature=="SBS2")%>%
  filter(cluster=="omikli")%>%filter(sub_project_code%in%target_type)%>%
  group_by(sub_project_code,imd_group)%>%
  dplyr::summarise(n=n())%>%spread(imd_group,n)

group_count[is.na(group_count)]<-0

group_count%>%mutate(sum_1to3=`1`+`2`+`3`)
#sub_project_code     n
#<chr>            <int>
#1 A3A                 16
#2 BLCA                16
#3 BRCA                61
#4 HNSC                17
#5 LUAD                 5
#6 LUSC                 3
#7 UCEC                 9

#sub_project_code   `1`   `2`   `3`   `4` `<NA>` sum_1to3
#<chr>            <int> <int> <int> <int>  <int>    <int>
#1 A3A                 16     0     0     0      0       16
#2 BLCA                 0     6    10     3      0       16
#3 BRCA                20    31    10     8      0       61
#4 ESAD                 0     0     4    12      1        4
#5 HNSC                 2     9     6    11      0       17
#6 LUAD                 0     3     2     5      0        5
#7 LUSC                 1     0     2    12      0        3
#8 UCEC                 0     6     3     6      0        9

tmp_A3A_group_df%>%filter(clonality=="clonal"|sub_project_code=="A3A")%>%filter(Signature=="SBS2")%>%
  filter(sub_project_code%in%target_type)%>%filter(cluster=="omikli")%>%arrange(imd_group)
summary(lm(sum_n ~ tot_event + imd_group + tot_event*imd_group, data=tmp_A3A_group_df%>%filter(clonality=="clonal"|sub_project_code=="A3A")%>%filter(Signature=="SBS2")%>%
             filter(sub_project_code%in%target_type)%>%filter(cluster=="omikli")%>%filter(!is.na(imd_group))))

#Coefficients:
#  Estimate Std. Error t value Pr(>|t|)
#(Intercept)          -2.543e+00  1.420e+01  -0.179 0.858017
#tot_event             3.535e-02  9.541e-03   3.705 0.000281 ***
#  imd_group2           -6.556e+00  1.824e+01  -0.359 0.719686
#imd_group3           -4.448e+01  1.992e+01  -2.232 0.026818 *
#  imd_group4            7.767e+00  1.850e+01   0.420 0.675136
#tot_event:imd_group2 -3.342e-03  1.042e-02  -0.321 0.748760
#tot_event:imd_group3  5.687e-05  9.659e-03   0.006 0.995309
#tot_event:imd_group4 -2.680e-02  9.575e-03  -2.799 0.005690 **
#  ---
sv_df<-read_tsv("/home/users/ayh/Projects/27_A3B/06_Figure_code/Fig3/pcawg_sv_sum.txt")

left_join(tmp_A3A_group_df%>%filter(clonality=="clonal"|sub_project_code=="A3A")%>%filter(Signature=="SBS2")%>%
            filter(sub_project_code%in%target_type),
          sv_df)%>%arrange(id)%>%
  mutate(sv_total=DEL+INV+TRA+DUP)%>%
  filter(imd_group!=4)%>%
  ggplot(aes(x=tot_event,y=sum_n,size=IMD_cutoff))+
  geom_point()+
  facet_wrap(~cluster+sub_project_code,ncol=8)
A3A_tot_event_ratio_df%>%filter(clonality=="clonal"|sub_project_code=="A3A")%>%filter(Signature=="SBS2")%>%
  #filter(sub_project_code%in%c("A3A","BLCA","BRCA"))%>%
  #filter(cluster=="kataegis")%>%
  filter(IMD_cutoff>500)%>%
  ggplot(aes(x=sub_project_code,y=cl_ratio))+
  geom_boxplot()+
  facet_wrap(~cluster)
A3A_tot_event_ratio_df%>%filter(clonality=="clonal"|sub_project_code=="A3A")%>%filter(Signature=="SBS2")%>%
  filter(sub_project_code%in%target_type)%>%filter(cluster=="omikli")%>%
  mutate(IMD_group=ifelse(IMD_cutoff<150,"low",
                          ifelse(IMD_cutoff<1000,"mid",
                                 "high")))%>%
  ggplot(aes(x=tot_event,y=sum_n,size=IMD_cutoff,col=IMD_group))+
  geom_point(alpha=0.7)+
  theme_bw()+
  ggtitle("omikli_150_1000")

A3A_tot_event_ratio_df%>%filter(clonality=="clonal"|sub_project_code=="A3A")%>%filter(Signature=="SBS2")%>%
  filter(sub_project_code%in%target_type)%>%filter(cluster=="kataegis")%>%filter(IMD_cutoff>1000)%>%
  mutate(IMD_group=ifelse(IMD_cutoff<150,"low",
                          ifelse(IMD_cutoff<1000,"mid",
                                 "high")))%>%
  ggplot(aes(x=tot_event,y=sum_n,size=IMD_cutoff,col=IMD_group))+
  geom_point(alpha=0.7)+
  theme_bw()+
  ggtitle("kataegis_150_1000")


A3A_tot_event_ratio_df%>%filter(clonality=="clonal"|sub_project_code=="A3A")%>%filter(Signature=="SBS2")%>%
  filter(sub_project_code%in%target_type)%>%filter(cluster=="omikli")%>%filter(IMD_cutoff>100)%>%
  ggplot(aes(x=IMD_cutoff,y=cl_ratio,size=IMD_cutoff,col=sub_project_code))+
  geom_point()+
  theme_bw()


A3A_tot_event_ratio_df%>%filter(clonality=="clonal"|sub_project_code=="A3A")%>%filter(Signature=="SBS2")%>%
  filter(sub_project_code%in%target_type)%>%filter(cluster=="kataegis")%>%filter(IMD_cutoff>500)%>%
  ggplot(aes(x=tot_event,y=sum_n,size=IMD_cutoff,col=sub_project_code))+
  geom_point()+
  theme_bw()


A3A_tot_event_ratio_df%>%filter(clonality=="clonal"|sub_project_code=="A3A")%>%filter(Signature=="SBS2")%>%
  filter(sub_project_code%in%target_type)%>%filter(cluster=="omikli")%>%filter(IMD_cutoff>100)%>%
  filter(tot_event>20000)%>%
  arrange(-sum_n)


A3A_tot_event_ratio_df%>%filter(clonality=="clonal"|sub_project_code=="A3A")%>%filter(Signature=="SBS2")%>%
  ggplot(aes(x=IMD_cutoff))+
  geom_histogram(binwidth=100)

A3A_tot_event_ratio_df%>%filter(clonality=="clonal"|sub_project_code=="A3A")%>%filter(Signature=="SBS2")%>%arrange(IMD_cutoff)

A3A_tot_event_ratio_df%>%filter(clonality=="clonal"|sub_project_code=="A3A")%>%filter(Signature=="SBS2")%>%filter(sub_project_code=="A3A")%>%print(n=100)




target_type<-c("A3A","BLCA","BRCA","HNSC","LUAD","LUSC","UCEC","ESAD")


A3A_tot_event_ratio_df%>%filter(clonality=="clonal"|sub_project_code=="A3A")%>%
  mutate(sig_ratio=`2and13sum`/tot_snv)%>%
  ggplot(aes(x=tot_snv,y=cl_ratio,col=sig_ratio))+
  geom_point()+
  facet_wrap(~cluster+sub_project_code,ncol=11)+
  scale_x_log10()