###### Selection de genes stables
######         2020.01.14 

# load librairies and set env   
library(ggplot2)
library(reshape2)
library(DESeq2)
library(stringr)
library(dplyr)

all_data = read.delim("data/HTSeq_count_stats_all_libraries.csv",
                      header = TRUE, 
                      sep = ",", 
                      row.names =  1, 
                      stringsAsFactors = TRUE) 

####### MACROPHAGES #######
# activated macrophages --------------------------------------------------------

tdata <- data.frame(all_data[-(1:5),
                             grep("^LPS(no)?_macro_(0h_Triptolide|3h_Triptolide)_(m|i).*exon$",
                                  colnames(all_data), 
                                  perl = TRUE)])

data = apply(tdata, 2, function(x) as.numeric(as.integer(x)))
rownames(data) = rownames(tdata)

# if I want to calculate a sd, i need use RPM values
RPMfactor = apply(data, 2, sum)
RPMfactor = 100000/RPMfactor

dataRPM = data
for (i in 1:ncol(data))
{
  dataRPM[,i] = data[,i]*RPMfactor[i]
}

# replicate 1 
data_rep1 = dataRPM[,c("LPS_macro_0h_Triptolide_m3_4_exon","LPS_macro_3h_Triptolide_m3_6_exon")]
plot(x = log10(data_rep1[,1]), y = log10(data_rep1[,2]),
     col = alpha("black", 0.1) , pch = 20, cex = 1,
     xlab = "0h Triptolide rep1", ylab = "3h Triptolide rep1",
     main = "Normalization verification in Activated macrophage")
abline(a = 0, b = 1, col = "dark blue")

rep1_data_stable_act = data_rep1[data_rep1[,1] > 0.2 & 
                                 data_rep1[,1] + 0.3 * data_rep1[,1] < data_rep1[,2] &
                                 data_rep1[,2] - 0.9 * data_rep1[,2] < data_rep1[,1],]

points(x = log10(rep1_data_stable_act[,1]), y = log10(rep1_data_stable_act[,2]),
       col = alpha("red", 1) , pch = 20, cex = 1)
legend("bottomright", legend = "stables genes", col = c("red"), pch = 20 )

# replicate 2 
data_rep2 = dataRPM[,c("LPS_macro_0h_Triptolide_m4_4_exon","LPS_macro_3h_Triptolide_m4_6_exon")]
plot(x = log10(data_rep2[,1]), y = log10(data_rep2[,2]),
     col = alpha("black", 0.1) , pch = 20, cex = 1,
     xlab = "0h Triptolide rep2", ylab = "3h Triptolide rep2",
     main = "Normalization verification in Activated macrophage")
abline(a = 0, b = 1, col = "dark blue")

rep2_data_stable_act = data_rep2[data_rep2[,1] > 0.2 & 
                                 data_rep2[,1] + 0.3 * data_rep2[,1] < data_rep2[,2] &
                                 data_rep2[,2] - 0.9 * data_rep2[,2] < data_rep2[,1],]

points(x = log10(rep2_data_stable_act[,1]), y = log10(rep2_data_stable_act[,2]),
       col = alpha("red", 1) , pch = 20, cex = 1)
legend("bottomright", legend = "stables genes", col = c("red"), pch = 20 )

# replicate 3 
data_rep3 = dataRPM[,c("LPS_macro_0h_Triptolide_m5_4_exon","LPS_macro_3h_Triptolide_m5_6_exon")]
plot(x = log10(data_rep3[,1]), y = log10(data_rep3[,2]),
     col = alpha("black", 0.1) , pch = 20, cex = 1,
     xlab = "0h Triptolide rep3", ylab = "3h Triptolide rep3",
     main = "Normalization verification in Activated macrophage")
abline(a = 0, b = 1, col = "dark blue")

rep3_data_stable_act = data_rep3[data_rep3[,1] > 0.2 &
                                 data_rep3[,1] + 0.3 * data_rep3[,1] < data_rep3[,2] &
                                 data_rep3[,2] - 0.9 * data_rep3[,2] < data_rep3[,1],]

points(x = log10(rep3_data_stable_act[,1]), y = log10(rep3_data_stable_act[,2]),
       col = alpha("red", 1) , pch = 20, cex = 1)
legend("bottomright", legend = "stables genes", col = c("red"), pch = 20 )

macroAStable <- intersect(intersect(rownames(rep1_data_stable_act),rownames(rep2_data_stable_act)),rownames(rep3_data_stable_act))

# Resting macrophages --------------------------------------------------------
# replicate 1 
data_rep1 = dataRPM[,c("LPSno_macro_0h_Triptolide_i3_4_exon","LPSno_macro_3h_Triptolide_i3_6_exon")]
plot(x = log10(data_rep1[,1]), y = log10(data_rep1[,2]),
     col = alpha("black", 0.1) , pch = 20, cex = 1,
     xlab = "0h Triptolide rep1", ylab = "3h Triptolide rep1",
     main = "Normalization verification in Resting macrophage")
abline(a = 0, b = 1, col = "dark blue")
rep1_data_stable_act = data_rep1[data_rep1[,1] > 0.2 &
                                 data_rep1[,1] + 0.3 * data_rep1[,1] < data_rep1[,2] &
                                 data_rep1[,2] - 0.9 * data_rep1[,2] < data_rep1[,1],]
points(x = log10(rep1_data_stable_act[,1]), y = log10(rep1_data_stable_act[,2]),
       col = alpha("red", 1) , pch = 20, cex = 1)
legend("bottomright", legend = "stables genes", col = c("red"), pch = 20 )

# replicate 2 
data_rep2 = dataRPM[,c("LPSno_macro_0h_Triptolide_i4_4_exon","LPSno_macro_3h_Triptolide_i4_6_exon")]
plot(x = log10(data_rep2[,1]), y = log10(data_rep2[,2]),
     col = alpha("black", 0.1) , pch = 20, cex = 1,
     xlab = "0h Triptolide rep2", ylab = "3h Triptolide rep2",
     main = "Normalization verification in Resting macrophage")
abline(a = 0, b = 1, col = "dark blue")
rep2_data_stable_act = data_rep2[data_rep2[,1] > 0.2 &
                                   data_rep2[,1] + 0.3 * data_rep2[,1] < data_rep2[,2] &
                                   data_rep2[,2] - 0.9 * data_rep2[,2] < data_rep2[,1],]
points(x = log10(rep2_data_stable_act[,1]), y = log10(rep2_data_stable_act[,2]),
       col = alpha("red", 1) , pch = 20, cex = 1)
legend("bottomright", legend = "stables genes", col = c("red"), pch = 20 )

# replicate 3 
data_rep3 = dataRPM[,c("LPSno_macro_0h_Triptolide_i5_4_exon","LPSno_macro_3h_Triptolide_i5_6_exon")]
plot(x = log10(data_rep3[,1]), y = log10(data_rep3[,2]),
     col = alpha("black", 0.1) , pch = 20, cex = 1,
     xlab = "0h Triptolide rep3", ylab = "3h Triptolide rep3",
     main = "Normalization verification in Activated macrophage")
abline(a = 0, b = 1, col = "dark blue")

rep3_data_stable_act = data_rep3[data_rep3[,1] > 0.2 & 
                                   data_rep3[,1] + 0.3 * data_rep3[,1] < data_rep3[,2] &
                                   data_rep3[,2] - 0.9 * data_rep3[,2] < data_rep3[,1],]

points(x = log10(rep3_data_stable_act[,1]), y = log10(rep3_data_stable_act[,2]),
       col = alpha("red", 1) , pch = 20, cex = 1)
legend("bottomright", legend = "stables genes", col = c("red"), pch = 20 )

macroRStable <- intersect(intersect(rownames(rep1_data_stable_act),rownames(rep2_data_stable_act)),rownames(rep3_data_stable_act))
macroStable <- intersect(macroRStable, macroAStable)

##### LYMPOCYTES #####
# Activated lymphocytes --------------------------------------

tdata <- data.frame(all_data[-(1:5),
                             grep("^(R|A).*(0h_Triptolide|3h_Triptolide)_exon$",colnames(all_data), perl = TRUE)])
data = apply(tdata, 2, function(x) as.numeric(as.integer(x)))
rownames(data) = rownames(tdata)

# if I want to calculate a sd, i need use RPM values
RPMfactor = apply(data, 2, sum)
RPMfactor = 100000/RPMfactor

dataRPM = data
for (i in 1:ncol(data))
{
  dataRPM[,i] = data[,i]*RPMfactor[i]
}


# replicate 1 
data_rep1 = dataRPM[,c("A2_7_Activated_0h_Triptolide_exon","A2_9_Activated_3h_Triptolide_exon")]
plot(x = log10(data_rep1[,1]), y = log10(data_rep1[,2]),
     col = alpha("black", 0.1) , pch = 20, cex = 1,
     xlab = "0h Triptolide rep1", ylab = "3h Triptolide rep1",
     main = "Normalization verification in Activated Lympho")
abline(a = 0, b = 1, col = "dark blue")

rep1_data_stable_act = data_rep1[data_rep1[,1] > 0.2 & 
                                   data_rep1[,1] + 0.4*data_rep1[,1] < data_rep1[,2] &
                                   data_rep1[,2] - 0.9*data_rep1[,2] < data_rep1[,1],]

points(x = log10(rep1_data_stable_act[,1]), y = log10(rep1_data_stable_act[,2]),
       col = alpha("red", 1) , pch = 20, cex = 1)
legend("bottomright", legend = "stables genes", col = c("red"), pch = 20 )

# replicate 2 
data_rep2 = dataRPM[,c("A3_7_Activated_0h_Triptolide_exon","A3_9_Activated_3h_Triptolide_exon")]
plot(x = log10(data_rep2[,1]), y = log10(data_rep2[,2]),
     col = alpha("black", 0.1) , pch = 20, cex = 1,
     xlab = "0h Triptolide rep2", ylab = "3h Triptolide rep2",
     main = "Normalization verification in Activated Lympho")
abline(a = 0, b = 1, col = "dark blue")

rep2_data_stable_act = data_rep2[data_rep2[,1] > 0.2 & 
                                   data_rep2[,1] + 0.7*data_rep2[,1] < data_rep2[,2] & 
                                   data_rep2[,2] - 0.9*data_rep2[,2] < data_rep2[,1],]

points(x = log10(rep2_data_stable_act[,1]), y = log10(rep2_data_stable_act[,2]),
       col = alpha("red", 1) , pch = 20, cex = 1)
legend("bottomright", legend = "stables genes", col = c("red"), pch = 20 )

# replicate 3 
data_rep3 = dataRPM[,c("A4_7_Activated_0h_Triptolide_exon","A4_9_Activated_3h_Triptolide_exon")]
plot(x = log10(data_rep3[,1]), y = log10(data_rep3[,2]),
     col = alpha("black", 0.1) , pch = 20, cex = 1,
     xlab = "0h Triptolide rep3", ylab = "3h Triptolide rep3",
     main = "Normalization verification in Activated Lympho")
abline(a = 0, b = 1, col = "dark blue")

rep3_data_stable_act = data_rep3[data_rep3[,1] > 0.2 &
                                   data_rep3[,1] + 0.6*data_rep3[,1] < data_rep3[,2] &
                                   data_rep3[,2] - 0.9*data_rep3[,2] < data_rep3[,1],]

points(x = log10(rep3_data_stable_act[,1]), y = log10(rep3_data_stable_act[,2]),
       col = alpha("red", 1) , pch = 20, cex = 1)
legend("bottomright", legend = "stables genes", col = c("red"), pch = 20 )

LymphoAStable <- intersect(intersect(rownames(rep1_data_stable_act),
                                     rownames(rep2_data_stable_act)),
                           rownames(rep3_data_stable_act))

# Resting lymphocytes -------------------------------------------------------
# replicate 1 
data_rep1 = dataRPM[,c("R2_4_Resting_0h_Triptolide_exon","R2_6_Resting_3h_Triptolide_exon")]
plot(x = log10(data_rep1[,1]), y = log10(data_rep1[,2]),
     col = alpha("black", 0.1) , pch = 20, cex = 1,
     xlab = "0h Triptolide rep1", ylab = "3h Triptolide rep1",
     main = "Normalization verification in Resting Lympho")
abline(a = 0, b = 1, col = "dark blue")

rep1_data_stable_act = data_rep1[data_rep1[,1] > 0.2 & 
                                   data_rep1[,1] + 0.5*data_rep1[,1] < data_rep1[,2] & 
                                   data_rep1[,2] - 0.9*data_rep1[,2] < data_rep1[,1],]

points(x = log10(rep1_data_stable_act[,1]), y = log10(rep1_data_stable_act[,2]),
       col = alpha("red", 1) , pch = 20, cex = 1)
legend("bottomright", legend = "stables genes", col = c("red"), pch = 20 )

# replicate 2 
data_rep2 = dataRPM[,c("R3_4_Resting_0h_Triptolide_exon","R3_6_Resting_3h_Triptolide_exon")]
plot(x = log10(data_rep2[,1]), y = log10(data_rep2[,2]),
     col = alpha("black", 0.1) , pch = 20, cex = 1,
     xlab = "0h Triptolide rep2", ylab = "3h Triptolide rep2",
     main = "Normalization verification in Resting Lympho")
abline(a = 0, b = 1, col = "dark blue")

rep2_data_stable_act = data_rep2[data_rep2[,1] > 0.2 & 
                                   data_rep2[,1] + 0.6*data_rep2[,1] < data_rep2[,2] &
                                   data_rep2[,2] - 0.9*data_rep2[,2] < data_rep2[,1],]

points(x = log10(rep2_data_stable_act[,1]), y = log10(rep2_data_stable_act[,2]),
       col = alpha("red", 1) , pch = 20, cex = 1)
legend("bottomright", legend = "stables genes", col = c("red"), pch = 20 )

# replicate 3 
data_rep3 = dataRPM[,c("R4_4_Resting_0h_Triptolide_exon","R4_6_Resting_3h_Triptolide_exon")]
plot(x=log10(data_rep3[,1]), y = log10(data_rep3[,2]),
     col = alpha("black", 0.1) , pch = 20, cex = 1,
     xlab = "0h Triptolide rep3", ylab = "3h Triptolide rep3",
     main = "Normalization verification in Activated Lympho")
abline(a = 0, b = 1, col = "dark blue")

rep3_data_stable_act = data_rep3[data_rep3[,1] > 0.2 & 
                                   data_rep3[,1] + 0.6*data_rep3[,1] < data_rep3[,2] & 
                                   data_rep3[,2] - 0.9*data_rep3[,2] < data_rep3[,1],]

points(x = log10(rep3_data_stable_act[,1]), y = log10(rep3_data_stable_act[,2]),
       col = alpha("red", 1) , pch = 20, cex = 1)
legend("bottomright", legend = "stables genes", col = c("red"), pch = 20 )

plot(x = log10(data_rep1[,1]), y = log10(data_rep1[,2]),
     col = alpha("black", 0.1) , pch = 20, cex = 1,
     xlab = "0h Triptolide rep1", ylab = "3h Triptolide rep1",
     main = "Normalization verification in Resting Lympho")
abline(a = 0, b = 1, col = "dark blue")

LymphoRStable <- intersect(intersect(rownames(rep1_data_stable_act),
                                     rownames(rep2_data_stable_act)),
                           rownames(rep3_data_stable_act))
LymphoStable <- intersect(LymphoRStable, LymphoAStable)


# Save Data ------------------------------------------------------------

stablesGenes = list(LymphoStable,macroStable)
save(stablesGenes, file = "results/stableGenesInEachCondition.RData")
