##########################################################
################# This script takes .bed file with coordinates of genomic features and fasta file with sequences of these features
################# The output is .bed file with coordinates of sequences sorted by GC content
##########################################################

library(stringr)

path_fasta<-""
path_bed<-""
path_sorted_bed<-""

ss<-readLines(path_fasta)
bb<-read.table(path_bed)

sss<-array(0)
begin<-array(0)
end<-array(0)
strand<-array(0)
chrom<-array(0)
aaa<-array(0)
ttt<-array(0)
ggg<-array(0)
ccc<-array(0)

k<-1
for(k in 1:length(ss)){s<-substr(ss[k],800,1200)
aaa[k]<-str_count(s, "a")/nchar(s)
ttt[k]<-str_count(s, "t")/nchar(s)
ggg[k]<-str_count(s, "g")/nchar(s)
ccc[k]<-str_count(s, "c")/nchar(s)
}

rrr<-cbind(bb,aaa,ggg,ttt,ccc)
rrr_sorted<-rrr[order(-(rrr[,5]+rrr[,7])),]

write.table(rrr_sorted[,1:3],path_sorted_bed,quote=FALSE,col.names = FALSE,row.names = FALSE,sep='\t')

