################################""
##### RIboDensity calcul #######
###############################


# protein coding genes ----------------------------------------------------

counts <-
  read.delim("data/HTSeq_count_stats_all_libraries.csv",
             header = TRUE,
             sep = ",")
countsRP <-
  read.delim("data/RiboProf_counts_stats.df",
             header = TRUE,
             sep = "\t")

colnames(counts) = c("EnsemblID", 
                     paste("RNAseqCounts", 
                           colnames(counts[2:960]), 
                           sep = "_"))
colnames(countsRP) = c("EnsemblID", 
                       paste("RPCounts", 
                             colnames(countsRP[2:73]), 
                             sep ="_"))

## Step 1 : Picking colones -> CDS only, and condition must match beetwen RNAseq and RP
RiboDensity_RP <- countsRP[,grep("(EnsemblID)|(CDS)$",colnames(countsRP))]
RiboDensity_RNAseq <- counts[,grep("(EnsemblID)|(0h_untreated.*CDS$)|(0h_Triptolide_(m|i).*CDS$)",colnames(counts))]

## Step 2 : Normalize value at 1 millions of Read
for (i in 2:length(RiboDensity_RP[1,]))
{
  nromfactor = 1000000 / sum(as.numeric(RiboDensity_RP[-(1:5),i]))
  for (j in 6:length(RiboDensity_RP[,1]))
  {
    RiboDensity_RP[j,i] = as.numeric(RiboDensity_RP[j,i]) * nromfactor
  }
}

for (i in 2:length(RiboDensity_RNAseq[1,]))
{
  nromfactor = 1000000 / sum(as.numeric(RiboDensity_RNAseq[-(1:5),i]))
  for (j in 6:length(RiboDensity_RNAseq[,1]))
  {
    RiboDensity_RNAseq[j,i] = as.numeric(RiboDensity_RNAseq[j,i]) * nromfactor
  }
}

## Step 3 : Ratio of RP / RNAseq

RiboDensity = merge(RiboDensity_RP[-(1:5),], RiboDensity_RNAseq[-(1:5),])
RiboDensity2 = cbind(as.character(RiboDensity$EnsemblID),
                     RiboDensity$RPCounts_Tmac.2_rT1_CDS/RiboDensity$RNAseqCounts_R2_1_Resting_0h_untreated_CDS, 
                     RiboDensity$RPCounts_Tmac.4_rT2_CDS/RiboDensity$RNAseqCounts_R3_1_Resting_0h_untreated_CDS,
                     RiboDensity$RPCounts_Tmac.6_rT3_CDS/RiboDensity$RNAseqCounts_R4_1_Resting_0h_untreated_CDS,
                     RiboDensity$RPCounts_Tmac.1_aT1_CDS/RiboDensity$RNAseqCounts_A2_1_Activated_0h_untreated_CDS,
                     RiboDensity$RPCounts_Tmac.3_aT2_CDS/RiboDensity$RNAseqCounts_A3_1_Activated_0h_untreated_CDS,
                     RiboDensity$RPCounts_Tmac.5_aT3_CDS/RiboDensity$RNAseqCounts_A4_1_Activated_0h_untreated_CDS,
                     RiboDensity$RPCounts_Tmac.7_rM1_CDS/RiboDensity$RNAseqCounts_LPSno_macro_0h_Triptolide_i3_4_CDS,
                     RiboDensity$RPCounts_Tmac.9_rM2_CDS/RiboDensity$RNAseqCounts_LPSno_macro_0h_Triptolide_i4_4_CDS,
                     RiboDensity$RPCounts_Tmac.11_rM3_CDS/RiboDensity$RNAseqCounts_LPSno_macro_0h_Triptolide_i5_4_CDS,
                     RiboDensity$RPCounts_Tmac.8_aM1_CDS/RiboDensity$RNAseqCounts_LPS_macro_0h_Triptolide_m3_4_CDS,
                     RiboDensity$RPCounts_Tmac.10_aM2_CDS/RiboDensity$RNAseqCounts_LPS_macro_0h_Triptolide_m4_4_CDS,
                     RiboDensity$RPCounts_Tmac.12_aM3_CDS/RiboDensity$RNAseqCounts_LPS_macro_0h_Triptolide_m5_4_CDS)

colnames(RiboDensity2) = c("EnsemblID",
                           "RiboDensity_Resting_lympho_1",
                           "RiboDensity_Resting_lympho_2",
                           "RiboDensity_Resting_lympho_3",
                           "RiboDensity_Activated_lympho_1",
                           "RiboDensity_Activated_lympho_2",
                           "RiboDensity_Activated_lympho_3",
                           "RiboDensity_LPSno_macro_1",
                           "RiboDensity_LPSno_macro_2",
                           "RiboDensity_LPSno_macro_3",
                           "RiboDensity_LPS_macro_1",
                           "RiboDensity_LPS_macro_2",
                           "RiboDensity_LPS_macro_3")

RiboDensity3 <- data.frame(matrix(nrow = nrow(RiboDensity2), ncol = 5))
colnames(RiboDensity3) <- c("EnsemblID", "RiboDens_LymphoR","RiboDens_LymphoA",
                            "RiboDens_MacroR", "RiboDens_MacroA")
for (i in 1:nrow(RiboDensity2))
{
  RiboDensity3[i,] = cbind(RiboDensity2[i,"EnsemblID"],
                           mean(as.numeric(RiboDensity2[i,2:4]), na.rm=TRUE ),
                           mean(as.numeric(RiboDensity2[i,5:7]), na.rm=TRUE),
                           mean(as.numeric(RiboDensity2[i,8:10]), na.rm=TRUE),
                           mean(as.numeric(RiboDensity2[i,11:13]), na.rm=TRUE))
}

RiboDensity3$EnsemblID <- gsub("(^[^.]*)(.*$)", "\\1", RiboDensity3$EnsemblID)

RiboDensity3[,-1] <- apply(RiboDensity3[,-1], MARGIN = 2, function(x) as.numeric(as.character(x)))

LR <- RiboDensity3$EnsemblID[is.finite(RiboDensity3$RiboDens_LymphoR)]
LA <- RiboDensity3$EnsemblID[is.finite(RiboDensity3$RiboDens_LymphoA)]
MR <- RiboDensity3$EnsemblID[is.finite(RiboDensity3$RiboDens_MacroR)]
MA <- RiboDensity3$EnsemblID[is.finite(RiboDensity3$RiboDens_MacroA)]

RiboDensity3 <- RiboDensity3[RiboDensity3$EnsemblID %in% LR |
                             RiboDensity3$EnsemblID %in% LA |
                             RiboDensity3$EnsemblID %in% MR |
                             RiboDensity3$EnsemblID %in% MA,]

write.csv(RiboDensity3, file = "results/RiboDensity.csv", row.names = FALSE)



# lincRNA -----------------------------------------------------------------
library(tidyverse)
counts <- read.delim("data/quantif_htseq_linRNA_Alllympho.csv",
             header = TRUE,
             sep = "\t")

## Step 1 : Picking colones -> CDS only, and condition must match beetwen RNAseq and RP
RiboDensity_RP <- counts[,grep("(ENSEMBL_GeneID)|(^Tmac.*)",colnames(counts))] 
RiboDensity_RP <- RiboDensity_RP[grep("ENSMUSG.*", x = RiboDensity_RP$ENSEMBL_GeneID),]
RiboDensity_RP$ENSEMBL_GeneID <- gsub("(^[^.]*)(.*$)", "\\1",  RiboDensity_RP$ENSEMBL_GeneID)


RiboDensity_RNAseq <- counts[,grep("(ENSEMBL_GeneID)|(0h_untreated.*)|(0h_Triptolide_(m|i).*)",colnames(counts))]
RiboDensity_RNAseq <- RiboDensity_RNAseq[grep("ENSMUSG.*", x = RiboDensity_RNAseq$ENSEMBL_GeneID),]
RiboDensity_RNAseq$ENSEMBL_GeneID <- gsub("(^[^.]*)(.*$)", "\\1",  RiboDensity_RNAseq$ENSEMBL_GeneID)


lincRNA <- as.character(unlist(read.csv("data/lincRNA.txt")))

# RiboDensity_RP <- RiboDensity_RP[RiboDensity_RP$ENSEMBL_GeneID %in% lincRNA,]
# RiboDensity_RNAseq <- RiboDensity_RNAseq[RiboDensity_RNAseq$ENSEMBL_GeneID %in% lincRNA,]

## Step 2 : Normalize value at 1 millions of Read

norm_RPM <- function(x) {
  nromfactor = 1000000 / sum(x)
  y <- as.numeric(x) * nromfactor
  return(y)
}

RiboDensity_RP[,-1] <- apply(RiboDensity_RP[,-1], 2, norm_RPM)
RiboDensity_RNAseq[,-1] <- apply(RiboDensity_RNAseq[,-1], 2, norm_RPM)

## Step 3 : Ratio of RP / RNAseq

RiboDensity = merge(RiboDensity_RP, RiboDensity_RNAseq)
RiboDensity2 = cbind(as.character(RiboDensity$ENSEMBL_GeneID),
                     RiboDensity$Tmac.2_CATCGCC_Resting.Tcell_Rep1/RiboDensity$R2_1_Resting_0h_untreated, 
                     RiboDensity$Tmac.4_GGAGCCC_Resting.Tcell_Rep2/RiboDensity$R3_1_Resting_0h_untreated,
                     RiboDensity$Tmac.6_ATCTGCC_Resting.Tcell_Rep3/RiboDensity$R4_1_Resting_0h_untreated,
                     RiboDensity$Tmac.1_GTGATCC_Activated.Tcell_Rep1/RiboDensity$A2_1_Activated_0h_untreated,
                     RiboDensity$Tmac.3_AGCTACC_Activated.Tcell_Rep2/RiboDensity$A3_1_Activated_0h_untreated,
                     RiboDensity$Tmac.5_ACTGTCC_Activated.Tcell_Rep3/RiboDensity$A4_1_Activated_0h_untreated,
                     RiboDensity$Tmac.7_CGGGACC_Resting.Macro_Rep1/RiboDensity$noLPS_macro_0h_Triptolide_i3_4,
                     RiboDensity$Tmac.9_TGACTCC_Resting.Macro_Rep2/RiboDensity$noLPS_macro_0h_Triptolide_i4_4,
                     RiboDensity$Tmac.11_ATTCACC_Resting.Macro_Rep3/RiboDensity$noLPS_macro_0h_Triptolide_i5_4,
                     RiboDensity$Tmac.8_TAGCCCC_LPS.activ.Macro_Rep1/RiboDensity$LPS_macro_0h_Triptolide_m3_4,
                     RiboDensity$Tmac.10_ACAAGCC_LPS.activ.Macro_Rep2/RiboDensity$LPS_macro_0h_Triptolide_m4_4,
                     RiboDensity$Tmac.12_TCTACCC_LPS.activ.Macro_Rep3/RiboDensity$LPS_macro_0h_Triptolide_m5_4)

colnames(RiboDensity2) = c("EnsemblID",
                           "RiboDensity_Resting_lympho_1",
                           "RiboDensity_Resting_lympho_2",
                           "RiboDensity_Resting_lympho_3",
                           "RiboDensity_Activated_lympho_1",
                           "RiboDensity_Activated_lympho_2",
                           "RiboDensity_Activated_lympho_3",
                           "RiboDensity_LPSno_macro_1",
                           "RiboDensity_LPSno_macro_2",
                           "RiboDensity_LPSno_macro_3",
                           "RiboDensity_LPS_macro_1",
                           "RiboDensity_LPS_macro_2",
                           "RiboDensity_LPS_macro_3")

RiboDensity3 <- data.frame(matrix(nrow = nrow(RiboDensity2), ncol = 5))
colnames(RiboDensity3) <- c("EnsemblID", "RiboDens_LymphoR","RiboDens_LymphoA",
                            "RiboDens_MacroR", "RiboDens_MacroA")
# calculate mean of replicates

for (i in 1:nrow(RiboDensity2))
{
  RiboDensity3[i,] = cbind(RiboDensity2[i,"EnsemblID"],
                           mean(as.numeric(RiboDensity2[i,2:4]), na.rm=TRUE ),
                           mean(as.numeric(RiboDensity2[i,5:7]), na.rm=TRUE),
                           mean(as.numeric(RiboDensity2[i,8:10]), na.rm=TRUE),
                           mean(as.numeric(RiboDensity2[i,11:13]), na.rm=TRUE))
}

RiboDensity3$EnsemblID <- gsub("(^[^.]*)(.*$)", "\\1", RiboDensity3$EnsemblID)

RiboDensity3[,-1] <- apply(RiboDensity3[,-1], MARGIN = 2, function(x) as.numeric(as.character(x)))

LR <- RiboDensity3$EnsemblID[is.finite(RiboDensity3$RiboDens_LymphoR)]
LA <- RiboDensity3$EnsemblID[is.finite(RiboDensity3$RiboDens_LymphoA)]
MR <- RiboDensity3$EnsemblID[is.finite(RiboDensity3$RiboDens_MacroR)]
MA <- RiboDensity3$EnsemblID[is.finite(RiboDensity3$RiboDens_MacroA)]

RiboDensity3 <- RiboDensity3[RiboDensity3$EnsemblID %in% LR |
                               RiboDensity3$EnsemblID %in% LA |
                               RiboDensity3$EnsemblID %in% MR |
                               RiboDensity3$EnsemblID %in% MA,]

RiboDensity3 <- RiboDensity3[RiboDensity3$EnsemblID %in% lincRNA, ]

write.csv(RiboDensity3, file = "results/RiboDensity_lincRNA.csv", row.names = FALSE)
