#! /bin/R
library(tidyverse)


# load data and Define vector parameters ----------------------------------

db <- read.csv("data/Laurent_Data.csv")
db <- as_tibble(db)
selec_column <- c("transcript_id", 
                  # "gene_id", 
                  # "gene_name", 
                  "Length.utr5", 
                  "Lympho_Activated.GOLD", 
                  "Lympho_Resting.GOLD", 
                  "Macro_Activated.GOLD", 
                  "Macro_Resting.GOLD", 
                  "UTR5_G", 
                  "UTR5_C", 
                  "Length.cds",
                  "CDS_G", 
                  "CDS_C", 
                  "Length.utr3",
                  "UTR3_G", 
                  "UTR3_C", 
                  # "trans_length", 
                  # "QC_passed", 
                  # "appris_level", 
                  # "CDS_length",
                  "n_m6ASeq_Peaks", 
                  "total_m6ASeq_score", 
                  # "UTR5_length", 
                  # "UTR3_length", 
                  # "n_Exp_UTR3_ends", 
                  # "Exp_UTR3_Ends_validated", 
                  "Exp_UTR3_Lympho_Activated_Mean",  
                  "Exp_UTR3_Macro_Activated_Mean",  
                  "Exp_UTR3_Lympho_Resting_Mean", 
                  "Exp_UTR3_Macro_Resting_Mean",
                  "CDS_DG", 
                  "UTR5_DG", 
                  "UTR3_DG", 
                  "X47UTR5.30CDS_DG", 
                  "X31CDS.End_DG", 
                  # "UTR5_50", 
                  "Kozack_score_frequency", 
                  "Kozack_score_efficiency", 
                  "Kozack_score_dinucleotide", 
                  "n_introns_in_UTR5", 
                  "n_CDS_fragments", 
                  "n_introns_in_UTR3", 
                  "n_exon", 
                  "n_intron", 
                  "RiboDens.Lympho_Resting", 
                  "RiboDens.Lympho_Activated",
                  "RiboDens.Macro_Resting", 
                  "RiboDens.Macro_Activated", 
                  #"Deg3hTrip_LR", 
                  #"Deg3hTrip_LA", 
                  #"Deg3hTrip_MR", 
                  #"Deg3hTrip_MA", 
                  "Abs.TDD..Lympho_Activated.Trip_CHX.3h", 
                  #"Rel.TDD..Lympho_Activated.Trip_CHX.3h", 
                  "Abs.NonTDD..Lympho_Activated.Trip_CHX.3h",
                  #"Rel.NonTDD..Lympho_Activated.Trip_CHX.3h", 
                  #"Abs.NonDEG..Lympho_Activated.Trip_CHX.3h", 
                  "Abs.TDD..Lympho_Activated.Trip_CHX.1h", 
                  #"Rel.TDD..Lympho_Activated.Trip_CHX.1h", 
                  "Abs.NonTDD..Lympho_Activated.Trip_CHX.1h",
                  #"Rel.NonTDD..Lympho_Activated.Trip_CHX.1h", 
                  #"Abs.NonDEG..Lympho_Activated.Trip_CHX.1h", 
                  "Abs.TDD..Lympho_Activated.Trip_Harr.3h", 
                  #"Rel.TDD..Lympho_Activated.Trip_Harr.3h", 
                  "Abs.NonTDD..Lympho_Activated.Trip_Harr.3h",
                  #"Rel.NonTDD..Lympho_Activated.Trip_Harr.3h", 
                  #"Abs.NonDEG..Lympho_Activated.Trip_Harr.3h",
                  "Abs.TDD..Lympho_Activated.Trip_Harr.1h", 
                  #"Rel.TDD..Lympho_Activated.Trip_Harr.1h", 
                  "Abs.NonTDD..Lympho_Activated.Trip_Harr.1h", 
                  #"Rel.NonTDD..Lympho_Activated.Trip_Harr.1h", 
                  #"Abs.NonDEG..Lympho_Activated.Trip_Harr.1h", 
                  "Abs.TDD..Lympho_Activated.DRB_CHX.3h", 
                  #"Rel.TDD..Lympho_Activated.DRB_CHX.3h", 
                  "Abs.NonTDD..Lympho_Activated.DRB_CHX.3h", 
                  #"Rel.NonTDD..Lympho_Activated.DRB_CHX.3h",
                  #"Abs.NonDEG..Lympho_Activated.DRB_CHX.3h", 
                  "Abs.TDD..Lympho_Activated.DRB_CHX.1h", 
                  #"Rel.TDD..Lympho_Activated.DRB_CHX.1h", 
                  "Abs.NonTDD..Lympho_Activated.DRB_CHX.1h",
                  #"Rel.NonTDD..Lympho_Activated.DRB_CHX.1h",
                  #"Abs.NonDEG..Lympho_Activated.DRB_CHX.1h", 
                  "Abs.TDD..Lympho_Activated.DRB_Harr.3h", 
                  #"Rel.TDD..Lympho_Activated.DRB_Harr.3h",
                  "Abs.NonTDD..Lympho_Activated.DRB_Harr.3h",
                  #"Rel.NonTDD..Lympho_Activated.DRB_Harr.3h", 
                  #"Abs.NonDEG..Lympho_Activated.DRB_Harr.3h", 
                  "Abs.TDD..Lympho_Activated.DRB_Harr.1h", 
                  #"Rel.TDD..Lympho_Activated.DRB_Harr.1h", 
                  "Abs.NonTDD..Lympho_Activated.DRB_Harr.1h",
                  #"Rel.NonTDD..Lympho_Activated.DRB_Harr.1h", 
                  #"Abs.NonDEG..Lympho_Activated.DRB_Harr.1h", 
                  "Abs.TDD..Lympho_Resting.Trip_CHX.3h", 
                  #"Rel.TDD..Lympho_Resting.Trip_CHX.3h", 
                  "Abs.NonTDD..Lympho_Resting.Trip_CHX.3h", 
                  #"Rel.NonTDD..Lympho_Resting.Trip_CHX.3h", 
                  #"Abs.NonDEG..Lympho_Resting.Trip_CHX.3h", 
                  "Abs.TDD..Lympho_Resting.Trip_CHX.1h", 
                  #"Rel.TDD..Lympho_Resting.Trip_CHX.1h", 
                  "Abs.NonTDD..Lympho_Resting.Trip_CHX.1h",
                  #"Rel.NonTDD..Lympho_Resting.Trip_CHX.1h",
                  #"Abs.NonDEG..Lympho_Resting.Trip_CHX.1h", 
                  "Abs.TDD..Lympho_Resting.Trip_Harr.3h",
                  #"Rel.TDD..Lympho_Resting.Trip_Harr.3h", 
                  "Abs.NonTDD..Lympho_Resting.Trip_Harr.3h", 
                  #"Rel.NonTDD..Lympho_Resting.Trip_Harr.3h", 
                  #"Abs.NonDEG..Lympho_Resting.Trip_Harr.3h",
                  "Abs.TDD..Lympho_Resting.Trip_Harr.1h", 
                  #"Rel.TDD..Lympho_Resting.Trip_Harr.1h", 
                  "Abs.NonTDD..Lympho_Resting.Trip_Harr.1h", 
                  #"Rel.NonTDD..Lympho_Resting.Trip_Harr.1h", 
                  #"Abs.NonDEG..Lympho_Resting.Trip_Harr.1h", 
                  "Abs.TDD..Macro_Activated.Trip_CHX.3h", 
                  #"Rel.TDD..Macro_Activated.Trip_CHX.3h", 
                  "Abs.NonTDD..Macro_Activated.Trip_CHX.3h", 
                  #"Rel.NonTDD..Macro_Activated.Trip_CHX.3h", 
                  #"Abs.NonDEG..Macro_Activated.Trip_CHX.3h", 
                  "Abs.TDD..Macro_Activated.Trip_Harr.3h", 
                  #"Rel.TDD..Macro_Activated.Trip_Harr.3h", 
                  "Abs.NonTDD..Macro_Activated.Trip_Harr.3h",
                  #"Rel.NonTDD..Macro_Activated.Trip_Harr.3h", 
                  #"Abs.NonDEG..Macro_Activated.Trip_Harr.3h", 
                  "Abs.TDD..Macro_Resting.Trip_CHX.3h", 
                  #"Rel.TDD..Macro_Resting.Trip_CHX.3h", 
                  "Abs.NonTDD..Macro_Resting.Trip_CHX.3h",
                  #"Rel.NonTDD..Macro_Resting.Trip_CHX.3h", 
                  #"Abs.NonDEG..Macro_Resting.Trip_CHX.3h", 
                  "Abs.TDD..Macro_Resting.Trip_Harr.3h", 
                  #"Rel.TDD..Macro_Resting.Trip_Harr.3h",
                  "Abs.NonTDD..Macro_Resting.Trip_Harr.3h",
                  #"Rel.NonTDD..Macro_Resting.Trip_Harr.3h",
                  #"Abs.NonDEG..Macro_Resting.Trip_Harr.3h", 
                  "n4G_UTR5", 
                  "n4G_CDS", 
                  "n4G_UTR3",
                  #"n4G_total", 
                  # "Hwang.Lympho_Resting.polyA_length_WT1",
                  # "Hwang.Lympho_Resting.polyA_length_WT2",
                  # "Hwang.Lympho_Resting.polyA_length_WT3", 
                  # "Hwang.Lympho_Activated.polyA_length_WT1", 
                  # "Hwang.Lympho_Activated.polyA_length_WT2",
                  "Hwang.transcript_half.life", 
                  "Stretches_AAA_AAG_score",
                  #"Stretches_AAA_AAG_pondarated_score", 
                  "Stretches_GAC_GAT_score", 
                  #"Stretches_GAC_GAT_pondarated_score", 
                  "Stretches_GAA_GAG_score", 
                  #"Stretches_GAA_GAG_pondarated_score", 
                  "Stretches_AAA_AAG_GAC_GAT_GAA_GAG_score",
                  #"Stretches_AAA_AAG_GAC_GAT_GAA_GAG_pondarated_score",
                  "percent_ALA_A", 
                  "percent_ARG_R", 
                  "percent_ASN_N", 
                  "percent_ASP_D",
                  "percent_CYS_C", 
                  "percent_GLU_E", 
                  "percent_GLN_Q", 
                  "percent_GLY_G",
                  "percent_HIS_H", 
                  "percent_ILE_I", 
                  "percent_LEU_L",
                  "percent_LYS_K", 
                  "percent_MET_M", 
                  "percent_PHE_F", 
                  "percent_PRO_P", 
                  "percent_SER_S", 
                  "percent_THR_T", 
                  "percent_TRP_W",
                  "percent_TYR_Y", 
                  "percent_VAL_V")
                  #"GO_Term",
                  #"GO_Name", 
                  #"Wrong_Kozack_Environment", 
                  #"Wrong_Intron_Donor_Site", 
                  #"Wrong_Intron_Acceptor_Site", 
                  #"Trancript_duplication")


drop_column <- c("UTR5_G", 
                 "UTR3_G", 
                 "CDS_G",
                 "UTR5_C",
                 "UTR3_C", 
                 "CDS_C")#,
                 # "Hwang.Lympho_Resting.polyA_length_WT1",
                 # "Hwang.Lympho_Resting.polyA_length_WT2",
                 # "Hwang.Lympho_Resting.polyA_length_WT3", 
                 # "Hwang.Lympho_Activated.polyA_length_WT1", 
                 # "Hwang.Lympho_Activated.polyA_length_WT2")

nontddindex <- c("Abs.NonTDD..Lympho_Activated.Trip_CHX.3h",
                 "Abs.NonTDD..Lympho_Activated.Trip_CHX.1h",
                 "Abs.NonTDD..Lympho_Activated.Trip_Harr.3h",
                 "Abs.NonTDD..Lympho_Activated.Trip_Harr.1h",
                 "Abs.NonTDD..Lympho_Activated.DRB_CHX.3h",
                 "Abs.NonTDD..Lympho_Activated.DRB_CHX.1h",
                 "Abs.NonTDD..Lympho_Activated.DRB_Harr.3h",
                 "Abs.NonTDD..Lympho_Activated.DRB_Harr.1h",
                 "Abs.NonTDD..Lympho_Resting.Trip_CHX.3h",
                 "Abs.NonTDD..Lympho_Resting.Trip_CHX.1h",
                 "Abs.NonTDD..Lympho_Resting.Trip_Harr.3h",
                 "Abs.NonTDD..Lympho_Resting.Trip_Harr.1h",
                 "Abs.NonTDD..Macro_Activated.Trip_CHX.3h",
                 "Abs.NonTDD..Macro_Activated.Trip_Harr.3h",
                 "Abs.NonTDD..Macro_Resting.Trip_CHX.3h",
                 "Abs.NonTDD..Macro_Resting.Trip_Harr.3h")

tdd_index <- c("Abs.TDD..Lympho_Activated.Trip_CHX.3h", 
               "Abs.TDD..Lympho_Activated.Trip_CHX.1h",
               "Abs.TDD..Lympho_Activated.Trip_Harr.3h",
               "Abs.TDD..Lympho_Activated.Trip_Harr.1h",  
               "Abs.TDD..Lympho_Activated.DRB_CHX.3h", 
               "Abs.TDD..Lympho_Activated.DRB_CHX.1h",  
               "Abs.TDD..Lympho_Activated.DRB_Harr.3h",
               "Abs.TDD..Lympho_Activated.DRB_Harr.1h", 
               "Abs.TDD..Lympho_Resting.Trip_CHX.3h", 
               "Abs.TDD..Lympho_Resting.Trip_CHX.1h",
               "Abs.TDD..Lympho_Resting.Trip_Harr.3h",
               "Abs.TDD..Lympho_Resting.Trip_Harr.1h", 
               "Abs.TDD..Macro_Activated.Trip_CHX.3h", 
               "Abs.TDD..Macro_Activated.Trip_Harr.3h", 
               "Abs.TDD..Macro_Resting.Trip_CHX.3h", 
               "Abs.TDD..Macro_Resting.Trip_Harr.3h")

# polyA_length <- c("Hwang.Lympho_Resting.polyA_length",
#                   "Hwang.Lympho_Activated.polyA_length")

UTR3 <- c("Exp_UTR3_Lympho_Activated_Mean",  
          "Exp_UTR3_Macro_Activated_Mean",  
          "Exp_UTR3_Lympho_Resting_Mean", 
          "Exp_UTR3_Macro_Resting_Mean")

riboDens <- c("RiboDens.Lympho_Resting", 
              "RiboDens.Lympho_Activated",
              "RiboDens.Macro_Resting", 
              "RiboDens.Macro_Activated")


# functions ---------------------------------------------------------------

pivot_data <- function(table, tdd_index, deg, UTR3, riboDens) {
  
  table <- table %>% pivot_longer(tdd_index, names_to = "tdd_condition", values_to = "tdd_index") 
  
  table$tdd_cell <- NA
  table[grep(pattern = "Lympho", table$tdd_condition) ,"tdd_cell"] <- "lympho"
  table[grep(pattern = "Macro", table$tdd_condition) ,"tdd_cell"] <- "macro"
  
  table$tdd_time <- NA
  table[grep(pattern = "3h", table$tdd_condition) ,"tdd_time"] <- "3h"
  table[grep(pattern = "1h", table$tdd_condition) ,"tdd_time"] <- "1h"
  
  table$tdd_transcription_drug <- NA
  table[grep(pattern = "Trip", table$tdd_condition) ,"tdd_transcription_drug"] <- "Triptolide"
  table[grep(pattern = "DRB", table$tdd_condition) ,"tdd_transcription_drug"] <- "DRB"
  
  table$tdd_translation_drug <- NA
  table[grep(pattern = "CHX", table$tdd_condition) ,"tdd_translation_drug"] <- "CHX"
  table[grep(pattern = "Harr", table$tdd_condition) ,"tdd_translation_drug"] <- "HAR"
  
  table$tdd_state <- NA
  table[grep(pattern = "Resting", table$tdd_condition) ,"tdd_state"] <- "resting"
  table[grep(pattern = "Activated", table$tdd_condition) ,"tdd_state"] <- "activated"
  
  
  table <- table %>% pivot_longer(deg, names_to = "degradation_condition", values_to =  "degradation_value")
  
  table$deg_cell <- NA
  table[grep(pattern = "Lympho", table$degradation_condition) ,"deg_cell"] <- "lympho"
  table[grep(pattern = "Macro", table$degradation_condition) ,"deg_cell"] <- "macro"
  
  table$deg_state <- NA
  table[grep(pattern = "Resting", table$degradation_condition) ,"deg_state"] <- "resting"
  table[grep(pattern = "Activated", table$degradation_condition) ,"deg_state"] <- "activated"
  
  table$deg_time <- NA
  table[grep(pattern = "3h", table$degradation_condition) ,"deg_time"] <- "3h"
  table[grep(pattern = "1h", table$degradation_condition) ,"deg_time"] <- "1h"
  
  table$deg_transcription_drug <- NA
  table[grep(pattern = "Trip", table$degradation_condition) ,"deg_transcription_drug"] <- "Triptolide"
  table[grep(pattern = "DRB", table$degradation_condition) ,"deg_transcription_drug"] <- "DRB"
  
  table <- table %>% filter(deg_cell == tdd_cell &
                            deg_state == tdd_state & 
                            deg_time == tdd_time &
                            deg_transcription_drug == tdd_transcription_drug)
  
  
  table <- table %>% pivot_longer(UTR3, names_to = "UTR3_condition", values_to =  "UTR3")
  
  table$utr3_cell <- NA
  table[grep(pattern = "Lympho", table$UTR3_condition) ,"utr3_cell"] <- "lympho"
  table[grep(pattern = "Macro", table$UTR3_condition) ,"utr3_cell"] <- "macro"
  
  table$utr3_state <- NA
  table[grep(pattern = "Resting", table$UTR3_condition) ,"utr3_state"] <- "resting"
  table[grep(pattern = "Activated", table$UTR3_condition) ,"utr3_state"] <- "activated"
  
  table <- table %>% filter(utr3_cell == tdd_cell &
                              utr3_state == tdd_state)
  
  table <- table %>% pivot_longer(riboDens, names_to = "riboDens_condition", values_to =  "RiboDens")
  
  table$riboDens_cell <- NA
  table[grep(pattern = "Lympho", table$riboDens_condition) ,"riboDens_cell"] <- "lympho"
  table[grep(pattern = "Macro", table$riboDens_condition) ,"riboDens_cell"] <- "macro"
  
  table$riboDens_state <- NA
  table[grep(pattern = "Resting", table$riboDens_condition) ,"riboDens_state"] <- "resting"
  table[grep(pattern = "Activated", table$riboDens_condition) ,"riboDens_state"] <- "activated"
  
  table <- table %>% filter(riboDens_cell == tdd_cell &
                              riboDens_state == tdd_state)
  
  # table <- table %>% pivot_longer(polyA_length, names_to = "polyA_condition", values_to =  "polyA")
  # 
  # table$polyA_cell <- NA
  # table[grep(pattern = "Lympho", table$polyA_condition) ,"polyA_cell"] <- "lympho"
  # table[grep(pattern = "Macro", table$polyA_condition) ,"polyA_cell"] <- "macro"
  
  # table$polyA_state <- NA
  # table[grep(pattern = "Resting", table$polyA_condition) ,"polyA_state"] <- "resting"
  # table[grep(pattern = "Activated", table$polyA_condition) ,"polyA_state"] <- "activated"
  # 
  # table <- table %>% filter(polyA_state == tdd_state)
  
  table <- table %>% filter((tdd_state == "resting" & tdd_cell == "lympho" & Lympho_Resting.GOLD == 1) |
                            (tdd_state == "activated" & tdd_cell == "lympho" & Lympho_Activated.GOLD == 1) |
                            (tdd_state == "resting" & tdd_cell == "macro" & Macro_Resting.GOLD == 1) |
                            (tdd_state == "activated" & tdd_cell == "macro" & Macro_Activated.GOLD == 1)
                     )
  
  table <- table %>% select(-c("tdd_condition",
                               "degradation_condition",
                               "UTR3_condition",
                               "deg_cell",
                               "deg_state",
                               "deg_time",
                               "deg_transcription_drug",
                               "utr3_cell",
                               "utr3_state",
                               "riboDens_condition",
                               "riboDens_cell",
                               "riboDens_state",
                               # "polyA_condition",
                               # "polyA_state"
                               "Lympho_Resting.GOLD",
                               "Lympho_Activated.GOLD",
                               "Macro_Resting.GOLD",
                               "Macro_Activated.GOLD"))
  return(table)
}

# calculate missing features ----------------------------------------------

# filter database
db <- select(db, selec_column)

# calculate % GC 
db$UTR5_GC <- db$UTR5_G + db$UTR5_C
db$UTR3_GC <- db$UTR3_G + db$UTR3_C
db$CDS_GC <- db$CDS_G + db$CDS_C

# calculate mean of polyA tail length
# db$Hwang.Lympho_Resting.polyA_length <- rowMeans(db[,c("Hwang.Lympho_Resting.polyA_length_WT1",
#                                                         "Hwang.Lympho_Resting.polyA_length_WT2",
#                                                         "Hwang.Lympho_Resting.polyA_length_WT3")
#                                                    ],
#                                                   na.rm = TRUE)
# db$Hwang.Lympho_Activated.polyA_length <- rowMeans(db[,c("Hwang.Lympho_Activated.polyA_length_WT1",
#                                                               "Hwang.Lympho_Activated.polyA_length_WT2")
#                                                        ],
#                                                       na.rm = TRUE)
# normalize DeltaG by length
db$CDS_DG <- db$CDS_DG / db$Length.cds
db$UTR5_DG <- db$UTR5_DG / db$Length.utr5
db$UTR3_DG <- db$UTR3_DG / db$Length.utr3

db$X31CDS.End_DG <-  db$X31CDS.End_DG / (db$Length.cds - 30)

# calculate total degradation rate
db$deg_rate.Lympho.Resting.1h.Trip <- db$Abs.TDD..Lympho_Resting.Trip_CHX.1h + db$Abs.NonTDD..Lympho_Resting.Trip_CHX.1h
db$deg_rate.Lympho.Resting.3h.Trip <- db$Abs.TDD..Lympho_Resting.Trip_CHX.3h + db$Abs.NonTDD..Lympho_Resting.Trip_CHX.3h
db$deg_rate.Lympho.Activated.1h.Trip <- db$Abs.TDD..Lympho_Activated.Trip_CHX.1h + db$Abs.NonTDD..Lympho_Activated.Trip_CHX.1h
db$deg_rate.Lympho.Activated.3h.Trip <- db$Abs.TDD..Lympho_Activated.Trip_CHX.3h + db$Abs.NonTDD..Lympho_Activated.Trip_CHX.3h
db$deg_rate.Lympho.Activated.1h.DRB <- db$Abs.TDD..Lympho_Activated.DRB_CHX.1h + db$Abs.NonTDD..Lympho_Activated.DRB_CHX.1h
db$deg_rate.Lympho.Activated.3h.DRB <- db$Abs.TDD..Lympho_Activated.DRB_CHX.3h + db$Abs.NonTDD..Lympho_Activated.DRB_CHX.3h
db$deg_rate.Macro.Resting.3h.Trip <- db$Abs.TDD..Macro_Resting.Trip_CHX.3h + db$Abs.NonTDD..Macro_Resting.Trip_CHX.3h
db$deg_rate.Macro.Activated.3h.Trip <- db$Abs.TDD..Macro_Activated.Trip_CHX.3h + db$Abs.NonTDD..Macro_Activated.Trip_CHX.3h
 
deg <- c("deg_rate.Lympho.Resting.1h.Trip",
         "deg_rate.Lympho.Resting.3h.Trip",
         "deg_rate.Lympho.Activated.1h.Trip",
         "deg_rate.Lympho.Activated.3h.Trip",
         "deg_rate.Lympho.Activated.1h.DRB",
         "deg_rate.Lympho.Activated.3h.DRB",
         "deg_rate.Macro.Resting.3h.Trip",
         "deg_rate.Macro.Activated.3h.Trip")

# Change NA to 0 when relevant --------------------------------------------

db$n_m6ASeq_Peaks[is.na(db$n_m6ASeq_Peaks)] <- 0
db$total_m6ASeq_score[is.na(db$total_m6ASeq_score)] <- 0
db$n4G_CDS[is.na(db$n4G_CDS)] <- 0
db$n4G_UTR5[is.na(db$n4G_UTR5)] <- 0
db$n4G_UTR3[is.na(db$n4G_UTR3)] <- 0


# create final wide databases ----------------------------------------------

# drop column after calculation
db <- select(db, -drop_column)

# separate in two db for tdd index and non tdd index
db_tddindex <- select(db, -nontddindex)
db_nontddindex <- select(db, -tdd_index)


# pivot databases ---------------------------------------------------------

db_tddindex <- pivot_data(db_tddindex, tdd_index, deg, UTR3, riboDens)
db_tddindex[!is.finite(db_tddindex$RiboDens), "RiboDens"] <- 0
db_nontddindex <- pivot_data(db_nontddindex, nontddindex, deg, UTR3, riboDens)
db_nontddindex[!is.finite(db_nontddindex$RiboDens), "RiboDens"] <- 0


# duplicate db for UTR3 and exclude NA ------------------------------------

db_tddindex_ensembl <- db_tddindex %>% select(-UTR3)
db_tddindex_ensembl <- na.exclude(db_tddindex_ensembl)
db_nontddindex_ensembl <- db_nontddindex %>% select(-UTR3)
db_nontddindex_ensembl <- na.exclude(db_nontddindex_ensembl)

db_tddindex_passeq <- db_tddindex %>% select(-Length.utr3)
db_tddindex_passeq <- na.exclude(db_tddindex_passeq)
db_nontddindex_passeq <- db_nontddindex %>% select(-Length.utr3)
db_nontddindex_passeq <- na.exclude(db_nontddindex_passeq)


# save data ---------------------------------------------------------------

db_list_ensembl <- list(db_tddindex_ensembl, db_nontddindex_ensembl)
db_list_passeq <- list(db_tddindex_passeq, db_nontddindex_passeq)

names(db_list_ensembl) <- c("db_tddindex_ensembl", "db_nontddindex_ensembl")
names(db_list_passeq) <- c("db_tddindex_passeq", "db_nontddindex_passeq")

save(db_list_ensembl, file = "results/db_for_RandomForest_longFormat_v2_ensembl.RData")
save(db_list_passeq, file = "results/db_for_RandomForest_longFormat_v2_passeq.RData")
