##pcawg clonal

library(dplyr)
library(tidyverse)
library(ggplot2)
metadata<-read_tsv("/home/users/ayh/Projects/27_A3B/01_public_data/pcawg/target_sample.txt")

metadata
cl_df<-read_tsv("/home/users/ayh/Projects/27_A3B/01_public_data/pcawg/timing_clonality/v2/vcf/separate_clonal/APOBEC_clonal_clustered_mutations/annotated/vaf_considered/APOBEC_clonal_clustered_mutations.all_samples.annotated.tsv")

compare_df<-read_tsv("/home/users/ayh/Projects/27_A3B/01_public_data/pcawg/timing_clonality/compare.txt")


files_to_read<-list.files("/home/users/ayh/Projects/27_A3B/01_public_data/pcawg/timing_clonality/v2/vcf/separate_clonal/original_files",
                          ".cont$",
                          full.names=T)


vcf_tmp<-lapply(files_to_read,function(x){
  read_tsv(x)%>%mutate(id=gsub(".timing.*","",basename(x)))%>%
    select(id,`#CHROM`,POS,REF,ALT,sig_cont,Clonality)
  
})

merge_df<-do.call(rbind,vcf_tmp)
merge_df<-merge_df%>%filter(grepl("[ATGC][ATGC]>[ATGC][ATGC]",sig_cont))
metadata<-metadata%>%plyr::rename(c("Tumor_Sample_Barcode"="id"))
merge_df<-left_join(merge_df,metadata%>%select(-`histology_abbreviation`))


merge_df<-left_join(merge_df,cl_df%>%select(chr,start,samples,IMD,cluster_id,cluster_type_omikli_upto_3)%>%plyr::rename(c("chr"="#CHROM","start"="POS","samples"="id")),by=c("#CHROM","id","POS"))
merge_df<-merge_df%>%mutate(IMD=ifelse(is.na(IMD),0,IMD),cluster_id=ifelse(is.na(cluster_id),".",cluster_id),cluster=ifelse(is.na(cluster_type_omikli_upto_3),"non-clust",cluster_type_omikli_upto_3))%>%select(-cluster_type_omikli_upto_3)

cl_merge_df<-merge_df

cl_merge_f_df<-left_join(
  cl_merge_df,
  cl_merge_df%>%group_by(id,`#CHROM`,Project_Code,cluster_id,cluster)%>%dplyr::summarise(AMS=sum(grepl("TC>[GT][AT]",sig_cont)))%>%filter(cluster!="non-clust")
)%>%filter(AMS>=2)

cl_merge_f_df<-left_join(cl_merge_f_df,cl_merge_f_df%>%group_by(id,`#CHROM`,cluster_id,cluster)%>%dplyr::summarise(no_snv=n()))


id_c<-cl_merge_f_df$id%>%unique()
for (t_id in c("14bf9bfb-14d6-4cac-a556-828f680e8a15")){
  #for (t_id in id_c){
  write.table(cl_merge_f_df%>%filter(id==t_id),
              paste("/home/users/ayh/Projects/27_A3B/03_sequencing/single_clone_sequencing/WGS/30_clustered_mutation/pcawg/",
                    t_id,
                    ".cl_info.txt",
                    sep=""),
              row.names=F,
              quote=F,
              sep="\t"
  )
}

##tot count##

sig_files<-list.files("/home/users/ayh/Projects/27_A3B/01_public_data/pcawg/timing_clonality/v2/vcf/separate_clonal/original_files",
                      "exposures.tsv",
                      full.names=T)
sig_tmp<-lapply(sig_files,function(x){
  read_tsv(x)%>%mutate(id=gsub(".timing.*","",basename(x)))
}
)


sig_merge_df<-do.call(rbind,sig_tmp)
sig_merge_df<-sig_merge_df%>%plyr::rename(c("Signature #"="Signature"))


##sig_ratio##
#############

sig_ratio_df<-left_join(sig_merge_df%>%filter(Signature%in%c("v3_2","v3_13"))%>%select(-Proportion)%>%spread(Signature,Exposure),
                        sig_merge_df%>%mutate(Exposure=round(Exposure,0))%>%
                          filter(Signature%in%c("v3_2","v3_13"))%>%
                          group_by(id)%>%dplyr::summarise(`2and13sum`=sum(Exposure))
)%>%
  mutate(SBS2_ratio=v3_2/`2and13sum`,SBS13_ratio=v3_13/`2and13sum`)%>%
  left_join(metadata)%>%select(-histology_abbreviation)%>%
  mutate(sub_project_code=gsub("-.*","",Project_Code))%>%
  gather(Signature,ratio,SBS2_ratio:SBS13_ratio)%>%
  mutate(Signature=gsub("_ratio","",Signature))

sig_ratio_df%>%
  ggplot(aes(x=sub_project_code,y=ratio,col=Signature))+
  geom_boxplot(outlier.shape=NA)+
  geom_point(position=position_jitterdodge())


##

cl_TCN_df<-cl_merge_f_df%>%filter(cluster!="non-clust")%>%
  filter(grepl("TC>[AGT][ACGT]",sig_cont))%>%
  group_by(id)%>%
  dplyr::summarise(cl_TCN=n())

cl_TCN_df<-left_join(metadata%>%select(id),
                     cl_TCN_df)

cl_TCN_df[is.na(cl_TCN_df)]<-0

zero_df<-rbind(
  merge_df%>%filter(!id%in%cl_merge_f_df$id)%>%select(id,Project_Code)%>%unique()%>%mutate(cluster="kataegis",sum_n=0),
  merge_df%>%filter(!id%in%cl_merge_f_df$id)%>%select(id,Project_Code)%>%unique()%>%mutate(cluster="omikli",sum_n=0),
  merge_df%>%filter(!id%in%cl_merge_f_df$id)%>%select(id,Project_Code)%>%unique()%>%mutate(cluster="mbs",sum_n=0),
  merge_df%>%filter(!id%in%cl_merge_f_df$id)%>%select(id,Project_Code)%>%unique()%>%mutate(cluster="other",sum_n=0)
  #merge_df%>%filter(!id%in%cl_merge_f_df$id)%>%select(id,Project_Code)%>%unique()%>%mutate(cluster="dbs",sum_n=0)
)


##add 2303056, cl_info##
cl_sv_df<-read_tsv("/home/users/ayh/Projects/27_A3B/01_public_data/pcawg/timing_clonality/v2/vcf/separate_clonal/pcawg_cl_br_info.txt")
##edit 230419

sv_cor_cl_df<-read_tsv("/home/users/ayh/Projects/27_A3B/03_sequencing/single_clone_sequencing/WGS/31_exclude_cluster/exclude_cl_mut_df.txt")
cl_merge_f_df<-cl_merge_f_df%>%mutate(info=paste(id,`#CHROM`,cluster_id,sep="_"))


cl_merge_f_df<-cl_merge_f_df%>%filter(!info%in%sv_cor_cl_df$info)
cl_sum_df<-cl_merge_f_df%>%select(id,Project_Code,`#CHROM`,cluster_id,cluster)%>%unique()%>%group_by(id,Project_Code,`#CHROM`,cluster)%>%dplyr::summarise(n=n())%>%group_by(id,Project_Code,cluster)%>%dplyr::summarise(sum_n=sum(n))%>%
  spread(cluster,sum_n)
cl_sum_df[is.na(cl_sum_df)]<-0

cl_sum_df<-cl_sum_df%>%gather(cluster,sum_n,kataegis:other)

cl_sum_df<-rbind(cl_sum_df,zero_df)

cl_sum_df<-rbind(cl_sum_df,merge_df%>%select(id,Project_Code)%>%unique()%>%mutate(cluster="dbs",sum_n=0))
event_df<-left_join(
  cl_sum_df,
  sig_merge_df%>%mutate(Exposure=round(Exposure,0))%>%
    filter(Signature%in%c("v3_2","v3_13"))%>%
    group_by(id)%>%dplyr::summarise(`2and13sum`=sum(Exposure))
)%>%left_join(cl_TCN_df)%>%
  mutate(cl_TCN=ifelse(is.na(cl_TCN),0,cl_TCN))%>%
  mutate(noncl_TCN=`2and13sum`-cl_TCN)

event_df<-  left_join(event_df,event_df%>%group_by(id)%>%dplyr::summarise(cl_event=sum(sum_n)))%>%
  mutate(tot_event=noncl_TCN+cl_event)%>%
  mutate(ratio=sum_n/tot_event)

write.table(event_df%>%mutate(sub_project_code=gsub("-.*","",Project_Code)),"/home/users/ayh/Projects/27_A3B/06_Figure_code/edit_figure/Fig3/pcawg_clonal_event_df.edit.exclsv.tsv",
            sep="\t",
            quote=F,
            row.names=F)

event_df<-read_tsv("/home/users/ayh/Projects/27_A3B/06_Figure_code/edit_figure/Fig3/pcawg_event_df.tsv")

event_df<-event_df%>%mutate(sub_project_code=gsub("-.*","",Project_Code))

write.table(event_df,"/home/users/ayh/Projects/27_A3B/06_Figure_code/edit_figure/Fig3/pcawg_clonal_event_df.tsv",
            sep="\t",
            quote=F,
            row.names=F)

