library(dplyr)
library(tidyverse)
library(Rsamtools)
library(stringi)
library(parallel)
#library(dplyr)
#library(tidyverse)
args=commandArgs(trailingOnly=TRUE)
options(scipen=999)
#bed_df<-read_tsv("/home/users/ayh/Projects/99_scripts/annotation/botseq_depth/cont_count/test/test.s.indel.bam.processed.F1R2.F2R1.Only_SS_lib.region.cont.v6.bed.excl.covered_region.F1R2.txt",
#                 col_names = c("#CHROM","POS1","POS2","count"))
bed_df<-read_tsv(args[1],
                 col_names=c("#CHROM","POS1","POS2","count"))


library(GenomicRanges)
fasta_file<-FaFile(file='/home/users/ayh/Projects/reference/genome/human/GRCh37/A3B/human_g1k_v37.rtTA.A3B_mcherry_vec.fa')

#as.data.frame(gene_seq)$x
#bed_df$`#CHROM`%>%unique()
if(args[2]=="F"){
  chr_list<-c(as.character(c(1:22)),"X")
}else{
  chr_list<-c(as.character(c(1:22)),"X","Y")
}
print("chr_list")
print(chr_list)
chr_list<-c(as.character(c(1:22)),"X")
#paste0(rep(rep(c("A","C","G","T"),each=4),2),rep(c("C","G"),each=16),rep(c("A","C","G","T"),8))
cont_list<-c("C","G",paste0(rep(rep(c("A","C","G","T"),each=4),2),rep(c("C","G"),each=16),rep(c("A","C","G","T"),8)))
bed_df$`#CHROM`%>%unique()
TCA_count<-mclapply(chr_list,function(x){
#    x="19"
  df<-bed_df%>%filter(`#CHROM`==x)
  #  df<-bed_df
  gene_gr<-GRanges(seqnames=df$`#CHROM`,IRanges(start=(as.numeric(df$POS1)), end=(as.numeric(df$POS2))),strand="+")
  gene_seq<-getSeq(fasta_file,gene_gr)
  cont_tmp<-lapply(cont_list,function(y){
    str_count(gene_seq,y)%>%as.tibble%>%mutate(cont=y)
  })
  cont_df<-do.call(rbind,cont_tmp)
  cont_df<-cont_df%>%mutate(count=rep(df$count,length(cont_list)))%>%mutate(edit_count=value*count)
},mc.cores=5
)
#df
#cont_df%>%group_by(cont)%>%dplyr::summarise(n=n())
#TCA_count
#merge_df[c(order(merge_df$cont))]
merge_df<-do.call(rbind,TCA_count)
merge_df$cont<-factor(merge_df$cont,levels=cont_list)
merge_df<-merge_df%>%mutate(edit_count=edit_count*2)
count_df<-rbind(
  merge_df%>%group_by(cont)%>%
    dplyr::summarise(tot_count=sum(edit_count)),
  merge_df%>%filter(!cont%in%c("C","G"))%>%mutate(cont_group=ifelse(grepl("TC[ACGT]",cont),"TCN",
                                                                    ifelse(grepl("[ACGT]GA",cont),"NGA",
                                                                           ifelse(grepl("[ACG]C[ACGT]",cont),"nonTCN","nonNGA"))))%>%
    group_by(cont_group)%>%
    dplyr::summarise(tot_count=sum(edit_count))%>%
    plyr::rename(c("cont_group"="cont"))
)
#count_df
#count_df_bak<-count_df
id=gsub(".s.indel.bam.*.txt","",basename(args[1]))
print(chr_list)
print(id)
print(basename(args[1]))
#strand=ifelse(grepl("F1R2.txt",basename(args[1])),"F1R2","F2R1")
#strand=ifelse(grepl(id,"F1R2"),"F1R2","F2R1")
count_df%>%
write.table(paste0(id,"/",paste0(id,".fin.covered_region.v7.cont.txt")),
            sep="\t",quote=F,row.names=F)

