library(dplyr)
library(tidyverse)

depth_files<-list.files("/home/users/ayh/Projects/27_A3B/03_sequencing/single_clone_sequencing/WTS/13_RNA_editing_bam_process/depth/v2",
                        "sum.txt",
                        full.names=T)
depth_files<-depth_files[!grepl("star2",depth_files)]
depth_tmp<-lapply(depth_files,function(x){
  read_tsv(x,col_names=c("depth"),show_col_types =FALSE)%>%mutate(id=gsub(".F.*R.*","",basename(x)))%>%mutate(read_type=gsub(".depth.sum.txt","",gsub("^.*[-_]..","",basename(x))))
})

depth_df<-do.call(rbind,depth_tmp)

depth_sum_df<-left_join(depth_df,depth_df%>%
                          group_by(id)%>%
                          dplyr::summarise(tot_depth=sum(depth)))%>%
  plyr::rename(c("id"="new_id"))


metadata<-read_tsv("/home/users/ayh/Projects/27_A3B/03_sequencing/single_clone_sequencing/WTS/13_RNA_editing_bam_process/dna/depth/metadata.v2.txt")%>%
  mutate(new_id=gsub(".F.*","",basename(F1R2)))


depth_merge_df<-left_join(depth_sum_df,metadata)


depth_merge_fil_df<-depth_merge_df%>%
  filter(APOBEC%in%c("A3A","A3B"))%>%
  filter(batch%in%c("bat1","bat3"))%>%
  filter(time%in%c("0h","48h"))


depth_merge_fil_df<-depth_merge_fil_df%>%mutate(depth_ratio=tot_depth/3119214835)
depth_fin_df<-depth_merge_fil_df%>%select(id,new_id,depth_ratio)%>%unique()%>%
  plyr::rename(c("id"="new_id","new_id"="id"))


rna_editing_df<-read_tsv("/home/users/ayh/Projects/27_A3B/03_sequencing/single_clone_sequencing/WTS/14_RNA_editing_integrate/star/varscan/final/reheader/APOBEC_rna_editing.v8.tsv")
rna_editing_df<-left_join(rna_editing_df,depth_fin_df)
#rna_editing_df%>%

rna_editing_filter_df<-rna_editing_df%>%
  filter(new_id%in%depth_fin_df$new_id)


rna_editing_filter_df<-rna_editing_filter_df%>%
  mutate(cor_var_readc=var_readc/depth_ratio,
         cor_depth=depth/depth_ratio,
         cor_F1R2_var_readc=F1R2_var_readc/depth_ratio,
         cor_F2R1_var_readc=F2R1_var_readc/depth_ratio,
         cor_F1R2_tot_readc=F1R2_tot_readc/depth_ratio,
         cor_F2R1_tot_readc=F2R1_tot_readc/depth_ratio)%>%
  select(id,new_id,depth_ratio,CHROM,POS,REF,ALT,gene_dir,align_dir,cor_var_readc,cor_depth,cor_F1R2_var_readc,cor_F2R1_var_readc,cor_F1R2_tot_readc,cor_F2R1_tot_readc,sig_cont,rescue,Func_refGene,Gene_refGene,ExonicFunc_refGene)

rna_editing_filter_df<-rna_editing_filter_df%>%filter(gene_dir!=".")

rna_id_c<-(rna_editing_filter_df%>%filter(cor_depth>=4,cor_var_readc>=3))$new_id%>%unique()


lapply(rna_id_c,function(x){
  #x
  #x="A3B_1st_C5_0h_100ng_bat1-2"
  df<-rna_editing_filter_df%>%filter(cor_depth>=4,cor_var_readc>=3)%>%
    filter(new_id==x)
  df<-df%>%dplyr::select(CHROM,POS,REF,ALT,align_dir,gene_dir)%>%mutate(ID=paste0(CHROM,":",POS,"_",REF,"/",ALT))%>%
    mutate(QUAL=".",FILTER=".","INFO"=".",FORMAT=".","sample"=".")%>%
    plyr::rename(c("CHROM"="#CHROM"))%>%
    dplyr::select(`#CHROM`,POS,ID,REF,ALT,REF,ALT,QUAL,FILTER,INFO,FORMAT,sample,align_dir,gene_dir)
  #    left_join(df%>%select(`#CRHOM`,POS,sample,))
  df
  write.table(df,paste0("/home/users/ayh/Projects/27_A3B/03_sequencing/single_clone_sequencing/WTS/19_RNA_editing_corrected/vcf/",x,"_corrected.vcf"),
              sep="\t",
              quote=F,
              row.names=F)
})

lapply(rna_id_c,function(x){
  #x="A3A_1st_C3_48h_100ng_bat1-1"
  df<-rna_editing_filter_df%>%filter(cor_depth>=4,cor_var_readc>=3)%>%
    filter(new_id==x)
  df<-df%>%dplyr::select(CHROM,POS,REF,ALT)%>%mutate(ID=paste0(CHROM,":",POS,"_",REF,"/",ALT))%>%
    mutate(QUAL=".",FILTER=".","INFO"=".",FORMAT=".","sample"=".")%>%
    plyr::rename(c("CHROM"="#CHROM"))%>%
    dplyr::select("#CHROM",POS,ID,REF,ALT,,REF,ALT,QUAL,FILTER,INFO,FORMAT,sample)%>%
    mutate(`#CHROM`=paste0("chr",`#CHROM`))
  df
  write.table(df,paste0("/home/users/ayh/Projects/27_A3B/03_sequencing/single_clone_sequencing/WTS/19_RNA_editing_corrected/vcf/",x,"_corrected.hg19.vcf"),
              sep="\t",
              quote=F,
              row.names=F)
})