A3A_cl_sv_df<-read_tsv("/home/users/ayh/Projects/27_A3B/03_sequencing/single_clone_sequencing/WGS/30_clustered_mutation/A3A_cl_sv_df.v3.txt")

library(dplyr)
library(tidyverse)
library(ggplot2)

metadata<-read_tsv("/home/users/ayh/Projects/27_A3B/03_sequencing/single_clone_sequencing/WGS/24_mutect2_strelka2_baseq/merge/clonal/sig/metadata.txt")


cl_df<-read_tsv("/home/users/ayh/Projects/27_A3B/03_sequencing/single_clone_sequencing/WGS/25_simulation/jolim_rerun/v2//APOBEC_clustered_mutations/annotated/vaf_considered/APOBEC_clustered_mutations.all_samples.annotated.tsv")


files_to_read<-list.files("/home/users/ayh/Projects/27_A3B/03_sequencing/single_clone_sequencing/WGS/24_mutect2_strelka2_baseq/merge/clonal/new_filter",
                          ".excl.*$",
                          full.names=T)



vcf_tmp<-lapply(files_to_read,function(x){
  read_tsv(x)%>%mutate(id=gsub(".mutect2.*","",basename(x)))%>%
    select(id,`#CHROM`,POS,REF,ALT,sig_cont)
  
})
merge_df<-do.call(rbind,vcf_tmp)
merge_df<-merge_df%>%filter(grepl("[ATGC][ATGC]>[ATGC][ATGC]",sig_cont))

merge_df<-left_join(merge_df,metadata%>%select(-`m/d`))

merge_df<-left_join(merge_df,cl_df%>%select(chr,start,samples,IMD,cluster_id,cluster_type_omikli_upto_3)%>%plyr::rename(c("chr"="#CHROM","start"="POS","samples"="id")))
merge_df<-merge_df%>%mutate(IMD=ifelse(is.na(IMD),0,IMD),cluster_id=ifelse(is.na(cluster_id),".",cluster_id),cluster=ifelse(is.na(cluster_type_omikli_upto_3),"non-clust",cluster_type_omikli_upto_3))%>%select(-cluster_type_omikli_upto_3)



A3A_cl_merge_df<-merge_df%>%filter(dose%in%c("3ug","100ng"))%>%filter(APOBEC=="A3A")

A3A_cl_merge_df<-A3A_cl_merge_df%>%mutate(APOBEC="A3A")
A3A_cl_merge_f_df<-left_join(
  A3A_cl_merge_df,
  A3A_cl_merge_df%>%group_by(id,`#CHROM`,APOBEC,dose,TP53,cluster_id,cluster)%>%dplyr::summarise(AMS=sum(grepl("TC>[GT][AT]",sig_cont)))%>%filter(cluster!="non-clust")
)%>%filter(AMS>=2)

A3A_cl_merge_f_df<-left_join(A3A_cl_merge_f_df,A3A_cl_merge_f_df%>%group_by(id,`#CHROM`,cluster_id,cluster)%>%dplyr::summarise(no_snv=n()))


##tot count##

sig_files<-list.files("/home/users/ayh/Projects/27_A3B/03_sequencing/single_clone_sequencing/WGS/24_mutect2_strelka2_baseq/merge/clonal/new_filter/sig_with_18",
                      "exposures.tsv",
                      full.names=T)
sig_tmp<-lapply(sig_files,function(x){
  read_tsv(x)%>%mutate(id=gsub(".mutect2.*","",basename(x)))
}
)


sig_merge_df<-do.call(rbind,sig_tmp)

##

cl_TCN_df<-A3A_cl_merge_f_df%>%filter(cluster!="non-clust")%>%
  filter(grepl("TC>[AGT][ACGT]",sig_cont))%>%
  group_by(id)%>%
  dplyr::summarise(cl_TCN=n())


cl_TCN_df<-left_join(metadata%>%filter(`m/d`!="M")%>%
                       filter(APOBEC=="A3A",
                              dose!="CTRL")%>%
                       select(id)
                     ,cl_TCN_df)

cl_TCN_df[is.na(cl_TCN_df)]<-0



sv_excl_df<-read_tsv("/home/users/ayh/Projects/27_A3B/03_sequencing/single_clone_sequencing/WGS/31_exclude_cluster/exclude_cl_mut_df.v2.txt")


miss_phs_excl_df<-read_tsv("/home/users/ayh/Projects/27_A3B/03_sequencing/single_clone_sequencing/WGS/31_exclude_cluster/miss_phased_cluster.txt")

miss_phs_excl_df<-miss_phs_excl_df%>%mutate(info=paste(id,`#CHROM`,cluster_id,sep="_"))

A3A_cl_merge_f_df<-A3A_cl_merge_f_df%>%mutate(info=paste(id,`#CHROM`,cluster_id,sep="_"))%>%
  filter(!info%in%sv_excl_df$info)%>%
  filter(!info%in%miss_phs_excl_df$info)

cl_sum_df<-A3A_cl_merge_f_df%>%filter(cluster%in%c("kataegis","omikli"))%>%select(id,dose,TP53,`#CHROM`,cluster_id,cluster)%>%unique()%>%group_by(id,dose,TP53,`#CHROM`,cluster)%>%dplyr::summarise(n=n())%>%group_by(id,dose,TP53,cluster)%>%dplyr::summarise(sum_n=sum(n))%>%
  spread(cluster,sum_n)

cl_sum_df<-A3A_cl_merge_f_df%>%select(id,dose,TP53,`#CHROM`,cluster_id,cluster)%>%unique()%>%group_by(id,dose,TP53,`#CHROM`,cluster)%>%dplyr::summarise(n=n())%>%group_by(id,dose,TP53,cluster)%>%dplyr::summarise(sum_n=sum(n))%>%
  spread(cluster,sum_n)


cl_sum_df[is.na(cl_sum_df)]<-0

cl_sum_df<-cl_sum_df%>%gather(cluster,sum_n,kataegis:other)


zero_df<-rbind(
  merge_df%>%filter(dose%in%c("3ug","100ng"))%>%filter(!id%in%A3A_cl_merge_f_df$id)%>%select(id,dose,TP53)%>%unique()%>%mutate(cluster="kataegis",sum_n=0),
  merge_df%>%filter(dose%in%c("3ug","100ng"))%>%filter(!id%in%A3A_cl_merge_f_df$id)%>%select(id,dose,TP53)%>%unique()%>%mutate(cluster="omikli",sum_n=0),
  merge_df%>%filter(dose%in%c("3ug","100ng"))%>%filter(!id%in%A3A_cl_merge_f_df$id)%>%select(id,dose,TP53)%>%unique()%>%mutate(cluster="mbs",sum_n=0),
  merge_df%>%filter(dose%in%c("3ug","100ng"))%>%filter(!id%in%A3A_cl_merge_f_df$id)%>%select(id,dose,TP53)%>%unique()%>%mutate(cluster="other",sum_n=0)
  #merge_df%>%filter(!id%in%cl_merge_f_df$id)%>%select(id,Project_Code)%>%unique()%>%mutate(cluster="dbs",sum_n=0)
)

cl_sum_df<-rbind(cl_sum_df,zero_df)%>%
  spread(cluster,sum_n)%>%print(n=100)

cl_sum_df[is.na(cl_sum_df)]<-0

cl_sum_df<-cl_sum_df%>%gather(cluster,sum_n,kataegis:other)


event_df<-left_join(
  cl_sum_df,
  sig_merge_df%>%mutate(Exposure=round(Exposure,0))%>%
    filter(Signature%in%c("v3_2","v3_13"))%>%
    group_by(id)%>%dplyr::summarise(`2and13sum`=sum(Exposure))
)%>%left_join(cl_TCN_df)%>%
  mutate(noncl_TCN=`2and13sum`-cl_TCN)


event_df<-left_join(event_df,event_df%>%group_by(id)%>%dplyr::summarise(cl_event=sum(sum_n)))%>%
  mutate(tot_event=noncl_TCN+cl_event)%>%
  mutate(ratio=sum_n/tot_event)

event_df<-event_df%>%mutate(APOBEC=ifelse(grepl("A3A",id),"A3A","A3B"))

write.table(event_df%>%filter(APOBEC=="A3A"),
            "/home/users/ayh/Projects/27_A3B/06_Figure_code/edit_figure/Fig3/A3A_event_df.edit.exclsv.tsv",
            sep="\t",
            quote=F,
            row.name=F)
