#! /bin/R

library(tidyverse)


# functions ---------------------------------------------------------------


prepare_index <- function(index_name, filtred = TRUE) {
  db <- "~/RMI2/gitlab/tdd/data/databases/2020-03-26_07-39-50_Subset_Data_processed.csv"
  table <- as_tibble(read.csv(db))
  
  db_linc <- "data/databases/current_subset_lincRNA.csv"
  table_linc <- as_tibble(read.csv(db_linc))
  
  TE_linc <- as_tibble(x = read.csv("results/RiboDensity_lincRNA.csv")) %>%
    select(EnsemblID, 
           RiboDens_LymphoR,
           RiboDens_LymphoA)
  colnames(TE_linc) <- c("gene_id", "RiboDens.Lympho_Resting", "RiboDens.Lympho_Activated")
  
  table_linc <- merge(table_linc, TE_linc)
  
  #harmonise database
  to_remove <- c("X",
                 "transcript_id",
                 "gene_name",
                 "lincRNA")
  
  table_linc <- table_linc %>% select(- to_remove)
  table_linc$type <- "linc RNA"
  table_coding <- table[,colnames(table) %in% colnames(table_linc)]
  table_coding$type <- "protein-coding RNA"
  
  #merge database
  double_table <- rbind(table_coding, table_linc)
  
  # filter data
  index <- double_table %>% filter((Lympho_Resting.GOLD == 1) |
                                    (Lympho_Activated.GOLD == 1) |
                                    type == "linc RNA") %>% 
    select(gene_id, index_name)
  
  if (filtred) {
    # Determine the cell type
    if (length(grep("Lympho",index_name, value = TRUE)) == 1 ) {
      cell <- "Lympho"
    }  else if (length(grep("Macro",index_name, value = TRUE)) == 1 ) {
      cell <- "Macro"
    }
    # Determine in the state type
    if (length(grep("Activated", index_name, value = TRUE)) == 1 ) {
      state <- "Activated"
    }  else if (length(grep("Resting", index_name, value = TRUE)) == 1 ) {
      state <- "Resting"
    }
    # load expressed Genes
    expressedGenes <- list(Lympho_Resting = unlist(read.csv(file = "~/RMI2/gitlab/tdd/results/filtred_genes_Lympho_Resting.csv")),
                           Lympho_Activated = unlist(read.csv(file = "~/RMI2/gitlab/tdd/results/filtred_genes_Lympho_Activated.csv")),
                           Macro_Resting = unlist(read.csv(file = "~/RMI2/gitlab/tdd/results/filtred_genes_Macro_Resting.csv")),
                           Macro_Activated = unlist(read.csv(file = "~/RMI2/gitlab/tdd/results/filtred_genes_Macro_Activated.csv")))
    # 
    index <- index %>% filter(gene_id %in% expressedGenes[[paste0(cell, "_", state)]])
    index <- unlist(index[, index_name])
  } else {
    index <- unlist(index[, index_name])
  } 
  
  return(index)
}

normalize_denstiy <- function(dens_data) {
  total <- sum(dens_data[['y']])
  dens_data[['y_norm']] <- dens_data[['y']] / total
  return(dens_data)
}

pval_sumcum_uniq <- function(mean_test, ref, n_perm, method = "greaterAbs"){
  
  mean_test <- mean_test[order(mean_test)]
  mean_dens <- normalize_denstiy(dens_data = density(mean_test))
  
  if (method == "greaterAbs") {
    df_z <- as.data.frame(cbind(mean_dens[['x']], 1 - cumsum(mean_dens[['y_norm']])))
    colnames(df_z) <- c("x", "y_norm")
    if (max(df_z$x) < median(ref)) {
      pval <- paste0("<", 1/n_perm)
    } else {
      pval <- df_z[df_z$x > median(ref),'y_norm'][1]
    }
    
  } else if (method == "diffAbs") {
    if (median(ref) > median(mean_test)) {
      df_z <- as.data.frame(cbind(mean_dens[['x']], 1 - cumsum(mean_dens[['y_norm']])))
      colnames(df_z) <- c("x", "y_norm")
      if (max(df_z$x) < median(ref)) {
        pval <- paste0("<", 1/n_perm)
      } else {
        pval <- df_z[df_z$x > median(ref),'y_norm'][1]
      }
      
    } else if (median(ref) < median(mean_test)) {
      df_z <- as.data.frame(cbind(mean_dens[['x']], cumsum(mean_dens[['y_norm']])))
      colnames(df_z) <- c("x", "y_norm")
      if (min(df_z$x) > median(ref)) {
        pval <- paste0("<", 1/n_perm)
      } else {
        pval <- df_z[df_z$x > median(ref),'y_norm'][1]
      }
    } else {
      stop("method must be \"greaterAbs\" or \"diffAbs\"")
    }
  }
  
  return(pval)
}

resample <- function(x, ...) x[sample.int(length(x), ...)]


pval_linc <- function(index_name, ref, n_perm = 10000) {
  message(paste(Sys.time(),": prepare_index"))
  index <- prepare_index(index_name, filtred = FALSE)
  index <- index[is.finite(index)]
  
  message(paste(Sys.time(),": create bootstrap means"))
  bootstrap <- c(rep(NA, times = n_perm))
  for (i in 1:n_perm) {
    bootstrap[i] <- median(resample(index, size = length(ref)))
  }
  
  message(paste(Sys.time(),": calculating pval"))
  pval <- pval_sumcum_uniq(mean_test = bootstrap, ref = ref, method = "diffAbs", n_perm = n_perm)
  
  return(pval)
}


# set data  ---------------------------------------------------------------


table <- read.csv("data/databases/2020-03-26_07-39-50_Subset_Data_processed.csv")

color_resting = "#69b3a2"
color_activated = "#404080"
color_1h = "deepskyblue"
color_3h = "darkmagenta"


pval <- list()

# lincRNA -----------------------------------------------------------------

db_linc <- "data/databases/current_subset_lincRNA.csv"
table_linc <- as_tibble(read.csv(db_linc))

#harmonise database
to_remove <- c("X",
               "transcript_id",
               "gene_name",
               "lincRNA")

table_linc <- table_linc %>% select(- to_remove)
table_linc$type <- "linc RNA"

index_name <- "Abs.TDD..Lympho_Resting.Trip_CHX.Ref_Trip_0h.3h"

ref <- unlist(table_linc %>% select(index_name))
ref <- ref[is.finite(ref)]

pval[["linc_Resting_AbsTDD3hCHX"]] <- pval_linc(index_name = index_name, 
                                                ref = ref, 
                                                n_perm = 1000000)

index_name <- "Abs.TDD..Lympho_Activated.Trip_CHX.Ref_Trip_0h.3h"

ref <- unlist(table_linc %>% select(index_name))
ref <- ref[is.finite(ref)]

pval[["linc_Activated_AbsTDD3hCHX"]] <- pval_linc(index_name = index_name, 
                                                ref = ref, 
                                                n_perm = 1000000)

index_name <- "Abs.TDD..Lympho_Resting.Trip_CHX.Ref_Trip_0h.1h"

ref <- unlist(table_linc %>% select(index_name))
ref <- ref[is.finite(ref)]
pval[["linc_Resting_AbsTDD1hCHX"]] <- pval_linc(index_name = index_name, 
                                                ref = ref, 
                                                n_perm = 1000000)

index_name <- "Abs.TDD..Lympho_Activated.Trip_CHX.Ref_Trip_0h.1h"

ref <- unlist(table_linc %>% select(index_name))
ref <- ref[is.finite(ref)]

pval[["linc_Activated_AbsTDD1hCHX"]] <- pval_linc(index_name = index_name, 
                                                  ref = ref, 
                                                  n_perm = 1000000)

index_name <- "Abs.NonTDD..Lympho_Resting.Trip_CHX.Ref_Trip_0h.3h"

ref <- unlist(table_linc %>% select(index_name))
ref <- ref[is.finite(ref)]

pval[["linc_Resting_AbsNonTDD3hCHX"]] <- pval_linc(index_name = index_name, 
                                                  ref = ref, 
                                                  n_perm = 1000000)

index_name <- "Abs.NonTDD..Lympho_Activated.Trip_CHX.Ref_Trip_0h.3h"

ref <- unlist(table_linc %>% select(index_name))
ref <- ref[is.finite(ref)]

pval[["linc_Activated_AbsNonTDD3hCHX"]] <- pval_linc(index_name = index_name, 
                                                   ref = ref, 
                                                   n_perm = 1000000)

index_name <- "Abs.NonTDD..Lympho_Resting.Trip_CHX.Ref_Trip_0h.1h"

ref <- unlist(table_linc %>% select(index_name))
ref <- ref[is.finite(ref)]

pval[["linc_Resting_AbsNonTDD1hCHX"]] <- pval_linc(index_name = index_name, 
                                                   ref = ref, 
                                                   n_perm = 1000000)

index_name <- "Abs.NonTDD..Lympho_Activated.Trip_CHX.Ref_Trip_0h.1h"

ref <- unlist(table_linc %>% select(index_name))
ref <- ref[is.finite(ref)]

pval[["linc_Activated_AbsNonTDD1hCHX"]] <- pval_linc(index_name = index_name, 
                                                   ref = ref, 
                                                   n_perm = 1000000)

TE_linc <- as_tibble(x = read.csv("results/RiboDensity_lincRNA.csv")) %>%
  select(EnsemblID,
         RiboDens_LymphoR,
         RiboDens_LymphoA)

index_name <- "RiboDens.Lympho_Resting"
ref <- unlist(TE_linc %>% select(RiboDens_LymphoR))
ref <- ref[is.finite(ref)]

pval[["linc_Resting_RiboDens"]] <- pval_linc(index_name = index_name,
                                                  ref = ref,
                                                  n_perm = 1000000)

index_name <- "RiboDens.Lympho_Activated"
ref <- unlist(TE_linc %>% select(RiboDens_LymphoA))
ref <- ref[is.finite(ref)]

pval[["linc_Activated_RiboDens"]] <- pval_linc(index_name = index_name,
                                             ref = ref,
                                             n_perm = 1000000)

# UPF2 --------------------------------------------------------------------

  Up_UPF2ko_BMM <- read.csv("data/Upregulated_UPF2ko_mouse_BMM.csv")
  Up_UPF2ko_BMM <- as.character(Up_UPF2ko_BMM[, 1])
  Up_UPF2ko_thymo <- read.csv("data/Upregulated_UPF2ko_mouse_thymocytes.csv")
  Up_UPF2ko_thymo <- as.character(Up_UPF2ko_thymo[, 1])
  
  index_name <- "Abs.TDD..Lympho_Resting.Trip_CHX.Ref_Trip_0h.3h"
  
  ref <- unlist(table %>% select(gene_name, index_name) %>% 
                    filter(gene_name %in% Up_UPF2ko_thymo | gene_name %in% Up_UPF2ko_BMM) %>%
                   select(index_name))
  ref <- ref[!is.na(ref)]
  
  pval[["upf2_Resting_AbsTDD3hCHX"]] <- pval_linc(index_name = index_name, 
                    ref = ref, 
                    n_perm = 1000000)
  
  index_name <- "Abs.TDD..Lympho_Activated.Trip_CHX.Ref_Trip_0h.3h"
  
  ref <- unlist(table %>% select(gene_name, index_name) %>% 
                  filter(gene_name %in% Up_UPF2ko_thymo | gene_name %in% Up_UPF2ko_BMM) %>%
                  select(index_name))
  ref <- ref[!is.na(ref)]
  
  pval[["upf2_Activated_AbsTDD3hCHX"]] <- pval_linc(index_name = index_name, 
                                                  ref = ref, 
                                                  n_perm = 1000000)
  
  index_name <- "Abs.TDD..Lympho_Resting.Trip_CHX.Ref_Trip_0h.1h"
  
  ref <- unlist(table %>% select(gene_name, index_name) %>% 
                  filter(gene_name %in% Up_UPF2ko_thymo | gene_name %in% Up_UPF2ko_BMM) %>%
                  select(index_name))
  ref <- ref[!is.na(ref)]
  
  pval[["upf2_Resting_AbsTDD1hCHX"]] <- pval_linc(index_name = index_name, 
                                                  ref = ref, 
                                                  n_perm = 1000000)
  
  index_name <- "Abs.TDD..Lympho_Activated.Trip_CHX.Ref_Trip_0h.1h"
  
  ref <- unlist(table %>% select(gene_name, index_name) %>% 
                  filter(gene_name %in% Up_UPF2ko_thymo | gene_name %in% Up_UPF2ko_BMM) %>%
                  select(index_name))
  ref <- ref[!is.na(ref)]
  
  pval[["upf2_Activated_AbsTDD1hCHX"]] <- pval_linc(index_name = index_name, 
                                                    ref = ref, 
                                                    n_perm = 1000000)
  
  index_name <- "Abs.NonTDD..Lympho_Resting.Trip_CHX.Ref_Trip_0h.3h"
  
  ref <- unlist(table %>% select(gene_name, index_name) %>% 
                  filter(gene_name %in% Up_UPF2ko_thymo | gene_name %in% Up_UPF2ko_BMM) %>%
                  select(index_name))
  ref <- ref[!is.na(ref)]
  
  pval[["upf2_Resting_AbsNonTDD3hCHX"]] <- pval_linc(index_name = index_name, 
                                                    ref = ref, 
                                                    n_perm = 1000000)
  
  index_name <- "Abs.NonTDD..Lympho_Resting.Trip_CHX.Ref_Trip_0h.1h"
  
  ref <- unlist(table %>% select(gene_name, index_name) %>% 
                  filter(gene_name %in% Up_UPF2ko_thymo | gene_name %in% Up_UPF2ko_BMM) %>%
                  select(index_name))
  ref <- ref[!is.na(ref)]
  
  pval[["upf2_Resting_AbsNonTDD1hCHX"]] <- pval_linc(index_name = index_name, 
                                                     ref = ref, 
                                                     n_perm = 1000000)
  


save(pval, file = "results/pval_bootstrap.RData")
