
#
# Copyright (C) 2018-2019 Mario Abdelmessih
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
#


##################################################
##################################################
FeatureGenerator <- function(TheSequences,SelectedFeat){

  require("Biostrings")
  TrainData=matrix("",dim(TheSequences)[1],(length(SelectedFeat)+1))
  for(k in 1:dim(TheSequences)[1]){
    GeneID=TheSequences[k,1]
    SeqLen=nchar(TheSequences[k,2])
    WindowSeq=TheSequences[k,2]
    Motif1=oligonucleotideFrequency(DNAString(WindowSeq), 1, step=1, as.prob=T)[SelectedFeat[which(nchar(SelectedFeat)==1)]]
    Motif2=oligonucleotideFrequency(DNAString(WindowSeq), 2, step=1, as.prob=T)[SelectedFeat[which(nchar(SelectedFeat)==2)]]
    Motif3=oligonucleotideFrequency(DNAString(WindowSeq), 3, step=1, as.prob=T)[SelectedFeat[which(nchar(SelectedFeat)==3)]]
    Motif4=oligonucleotideFrequency(DNAString(WindowSeq), 4, step=1, as.prob=T)[SelectedFeat[which(nchar(SelectedFeat)==4)]]
    Motif5=oligonucleotideFrequency(DNAString(WindowSeq), 5, step=1, as.prob=T)[SelectedFeat[which(nchar(SelectedFeat)==5)]]
    Motif6=oligonucleotideFrequency(DNAString(WindowSeq), 6, step=1, as.prob=T)[SelectedFeat[which(nchar(SelectedFeat)==6)]]
    Motif7=oligonucleotideFrequency(DNAString(WindowSeq), 7, step=1, as.prob=T)[SelectedFeat[which(nchar(SelectedFeat)==7)]]
    Motif8=oligonucleotideFrequency(DNAString(WindowSeq), 8, step=1, as.prob=T)[SelectedFeat[which(nchar(SelectedFeat)==8)]]
    TrainData[k,]=c(SeqLen,Motif1,Motif2,Motif3,Motif4,Motif5,Motif6,Motif7,Motif8)
  }
  colnames(TrainData)=c("3'UTR Length",names(Motif1),names(Motif2),names(Motif3),names(Motif4),names(Motif5),names(Motif6),names(Motif7),names(Motif8))
  return(TrainData)
}
##################################################
##################################################
PredictSeqeunceStability <-function(TestSequence){

  require("randomForest")
  load("SelectedFeatures_RESATotal_WT_.RData") # load the selcted features according to feature selection filter procedure
  load("Trained_RFmodel_RESATotal_WT_.RData") #load the trained random forest (RF) model that will be obtained by running RFModel_Training.R over the processed RNA-seq data

  DataFeat=FeatureGenerator(TestSequence,SelectedFeat)
  TestData=apply(DataFeat,2,as.numeric)
  RFPredicted=predict(TrainedRF, TestData)
  TestResults=cbind(TestSequence[,1],RFPredicted)
  return(TestResults)

}
######################################################################################################################################################
######################################################################################################################################################
TestSequence=read.csv(file="TestingSequence.csv",header=T,as.is = TRUE,check.names=FALSE)

TestResults=PredictSeqeunceStability(TestSequence)
ModelPrediction=cbind(TestSequence,TestResults[,2])
colnames(ModelPrediction)[3]="Predicted Stability"
write.csv(ModelPrediction,file="TestingSequence_PredictedStability.csv")

pdf("RFPredictedStability.pdf", width = 10, height = 25)
par(mar = c(14,4,4,4))
barplot(as.numeric(as.character(ModelPrediction[,3])), names.arg=ModelPrediction[,1],las=2,ylab="Predicted FC log2(6h/2h)",cex.names=1.8)
dev.off()
##################################################
##################################################
