library(dplyr)
library(tidyverse)
library(Rsamtools)
library(stringi)
library(parallel)
refgene_df<-read_tsv("/home/users/ayh/Projects/27_A3B/03_sequencing/single_clone_sequencing/WGS/28_genomic_feature/01_refernce/fin/rep_dir/rep_time_bin4.cut25.dir.tsv",
)%>%as.tibble()%>%
filter(repdir=="R")

refgene_df<-refgene_df%>%mutate(start=ifelse(start==0,1,start))
library(GenomicRanges)
fasta_file<-FaFile(file='/home/users/ayh/Projects/reference/genome/human/GRCh37/A3B/human_g1k_v37.rtTA.A3B_mcherry_vec.fa')

gene_gr<-GRanges(seqnames=refgene_df$chrom,IRanges(start=(as.numeric(refgene_df$start)), end=(as.numeric(refgene_df$end)+1)),strand="+")
gene_seq<-getSeq(fasta_file,reduce(gene_gr))
#as.data.frame(gene_seq)$x
TCA_count<-mclapply(as.data.frame(gene_seq)$x,function(x){
  return(str_count(x,"TC[ACGT]")%>%as.tibble())
}
, mc.cores = 4)
write.table(sum((do.call(rbind,TCA_count))$value),"rep_time_bin4.cut25.dir.R.TCN.txt",
            sep="\t",
            quote=F,
            col.names=F,
            row.names=F)
TCA_count<-mclapply(as.data.frame(gene_seq)$x,function(x){
  return(str_count(x,"[ACGT]GA")%>%as.tibble())
}
, mc.cores = 4)
write.table(sum((do.call(rbind,TCA_count))$value),"rep_time_bin4.cut25.dir.R.NGA.txt",
            sep="\t",
            quote=F,
            col.names=F,
            row.names=F)
