########################################
###### Normalisation des données  ######
######         2020.01.23         ######
########################################

# 0. Set env --------------------------------------------------------------

library(ggplot2)
theme_set(theme_bw())
library(reshape2)
library(DESeq2)
library(stringr)
library(dplyr)

color_resting = "#69b3a2"
color_activated = "#404080"

all_data = read.delim("data/HTSeq_count_stats_all_libraries.csv", 
                      header=TRUE, 
                      sep=",", 
                      row.names=1, 
                      stringsAsFactors = TRUE) 

load(file = "results/stableGenesInEachCondition.RData")

pdf(file = "results/01_check_Normalisation.pdf")
# 1. Macro --------------------------------------------------------

macroRestingCounts = all_data[grep("ENSMUSG*", rownames(all_data)), grep("LPSno_.*macro.*(_exon$)", colnames(all_data))]

colData = as.data.frame((colnames(macroRestingCounts)))
conds = str_extract(colData[,1], ".*(e|d|X|R)_")
colData <- cbind(colData, conds)
colnames(colData) = c("librairies","group")

macroRestingStableCounts = macroRestingCounts[rownames(macroRestingCounts) %in% stablesGenes[[2]],]

dds = DESeqDataSetFromMatrix(countData = as.matrix(macroRestingCounts), 
                             colData = colData, 
                             design = ~ group);
sizeFactors(dds) <- estimateSizeFactorsForMatrix(macroRestingStableCounts)
dds <- DESeq(dds)

macroRestingCountsNormalized = counts(dds, normalized = TRUE)
# boxplot(log10(macroRestingCountsNormalized), outline = FALSE, las = 2, main = "Normalisation via genes stables")

data = macroRestingCountsNormalized[,grep("(0|3)h_Triptolide_i", colnames(macroRestingCountsNormalized))]

# rep 1
plot(x = log10(data[,"LPSno_macro_0h_Triptolide_i3_4_exon"]), y = log10(data[,"LPSno_macro_3h_Triptolide_i3_6_exon"]),
     col = alpha(color_resting, 0.1) , pch = 20, cex = 1,
     xlab = "0h Triptolide ( log10(average_raw_counts), rep1 )", ylab = "3h Triptolide( log10(average_raw_counts), rep1)",
     main = "Normalization verification in Resting macrophage - Rep1"
)
abline(a = 0, b = 1, col = "dark blue")
avg_data = data[rownames(data) %in% stablesGenes[[2]],c("LPSno_macro_0h_Triptolide_i3_4_exon","LPSno_macro_3h_Triptolide_i3_6_exon")]
points(x = log10(avg_data[,1]), y = log10(avg_data[,2]),
       col = alpha("red", 1) , pch = 20, cex = 1)
legend("bottomright", legend = "stables genes", col = c("red"), pch = 20 )

# rep 2
plot(x = log10(data[,"LPSno_macro_0h_Triptolide_i4_4_exon"]), y = log10(data[,"LPSno_macro_3h_Triptolide_i4_6_exon"]),
     col = alpha(color_resting, 0.1) , pch = 20, cex = 1,
     xlab = "0h Triptolide ( log10(average_raw_counts), rep2 )", ylab = "3h Triptolide( log10(average_raw_counts), rep2)",
     main = "Normalization verification in Resting macrophage - Rep2"
)
abline(a = 0, b = 1, col = "dark blue")
avg_data = data[rownames(data) %in% stablesGenes[[2]],c("LPSno_macro_0h_Triptolide_i4_4_exon","LPSno_macro_3h_Triptolide_i4_6_exon")]
points(x = log10(avg_data[,1]), y = log10(avg_data[,2]),
       col = alpha("red", 1) , pch = 20, cex = 1)
legend("bottomright", legend = "stables genes", col = c("red"), pch = 20 )

# rep 3
plot(x = log10(data[,"LPSno_macro_0h_Triptolide_i5_4_exon"]), y = log10(data[,"LPSno_macro_3h_Triptolide_i5_6_exon"]),
     col = alpha(color_resting, 0.1) , pch = 20, cex = 1,
     xlab = "0h Triptolide ( log10(average_raw_counts), rep3 )", ylab = "3h Triptolide( log10(average_raw_counts), rep3)",
     main = "Normalization verification in Resting macrophage - Rep3"
)
abline(a = 0, b = 1, col = "dark blue")
avg_data = data[rownames(data) %in% stablesGenes[[2]],c("LPSno_macro_0h_Triptolide_i5_4_exon","LPSno_macro_3h_Triptolide_i5_6_exon")]
points(x = log10(avg_data[,1]), y = log10(avg_data[,2]),
       col = alpha("red", 1) , pch = 20, cex = 1)
legend("bottomright", legend = "stables genes", col = c("red"), pch = 20 )

# Replicate Mean's
avg_data = cbind(rowMeans(data[,1:3]), rowMeans(data[,4:6]))
plot(x = log10(avg_data[,1]), y = log10(avg_data[,2]),
     col = alpha(color_resting, 0.1) , pch = 20, cex = 1,
     xlab = "0h Triptolide ( log10(average_raw_counts), n = 3 )", ylab = "3h Triptolide( log10(average_raw_counts), n = 3 )",
     main = "Normalization verification in Resting macrophage - Mean"
)
abline(a = 0, b = 1, col="dark blue")
avg_data = avg_data[rownames(avg_data) %in% stablesGenes[[2]],]
points(x = log10(avg_data[,1]), y = log10(avg_data[,2]),
              col = alpha("red", 1) , pch = 20, cex = 1)
legend("bottomright", legend = "stables genes", col = c("red"), pch = 20 )


# 2. Macro Activated ------------------------------------------------------

macroActivatedCounts = all_data[grep("ENSMUSG*", rownames(all_data)), grep("LPS_.*macro.*(_exon$)", colnames(all_data))]

colData = as.data.frame((colnames(macroActivatedCounts)));
conds = str_extract(colData[,1], ".*(e|d|X|R)_")
colData <- cbind(colData, conds)
colnames(colData) = c("librairies","group");

macroActivatedStableCounts = macroActivatedCounts[rownames(macroActivatedCounts) %in% stablesGenes[[2]],]

dds = DESeqDataSetFromMatrix(countData = as.matrix(macroActivatedCounts), 
                             colData = colData, 
                             design = ~ group); 
sizeFactors(dds) <- estimateSizeFactorsForMatrix(macroActivatedStableCounts)
dds <- DESeq(dds)

macroActivatedCountsNormalized = counts(dds, normalized = TRUE)
# boxplot(log10(macroActivatedCountsNormalized), outline = FALSE, las = 2, main = "Normalisation via genes stables")

data = macroActivatedCountsNormalized[,grep("(0|3)h_Triptolide_m", colnames(macroActivatedCountsNormalized))]
# rep 1
plot(x = log10(data[,"LPS_macro_0h_Triptolide_m3_4_exon"]), y = log10(data[,"LPS_macro_3h_Triptolide_m3_6_exon"]),
     col = alpha(color_activated, 0.1) , pch = 20, cex = 1,
     xlab = "0h Triptolide ( log10(average_raw_counts), rep1 )", ylab = "3h Triptolide( log10(average_raw_counts), rep1)",
     main = "Normalization verification in Activated macrophage - Rep1"
)
abline(a=0, b=1, col="dark blue")
avg_data = data[rownames(data)%in%stablesGenes[[2]],c("LPS_macro_0h_Triptolide_m3_4_exon","LPS_macro_3h_Triptolide_m3_6_exon")]
points(x=log10(avg_data[,1]), y=log10(avg_data[,2]),
       col = alpha("red", 1) , pch = 20, cex = 1)
legend("bottomright", legend = "stables genes", col = c("red"), pch = 20 )

# rep 2
plot(x=log10(data[,"LPS_macro_0h_Triptolide_m4_4_exon"]), y=log10(data[,"LPS_macro_3h_Triptolide_m4_6_exon"]),
     col = alpha(color_activated, 0.1) , pch = 20, cex = 1,
     xlab = "0h Triptolide ( log10(average_raw_counts), rep2 )", ylab = "3h Triptolide( log10(average_raw_counts), rep2)",
     main = "Normalization verification in Activated macrophage - Rep2"
)
abline(a=0, b=1, col="dark blue")
avg_data = data[rownames(data)%in%stablesGenes[[2]],c("LPS_macro_0h_Triptolide_m4_4_exon","LPS_macro_3h_Triptolide_m4_6_exon")]
points(x=log10(avg_data[,1]), y=log10(avg_data[,2]),
       col = alpha("red", 1) , pch = 20, cex = 1)
legend("bottomright", legend = "stables genes", col = c("red"), pch = 20 )

# rep 3
plot(x=log10(data[,"LPS_macro_0h_Triptolide_m5_4_exon"]), y=log10(data[,"LPS_macro_3h_Triptolide_m5_6_exon"]),
     col = alpha(color_activated, 0.1) , pch = 20, cex = 1,
     xlab = "0h Triptolide ( log10(average_raw_counts), rep3 )", ylab = "3h Triptolide( log10(average_raw_counts), rep3)",
     main = "Normalization verification in Activated macrophage - Rep3"
)
abline(a=0, b=1, col="dark blue")
avg_data = data[rownames(data)%in%stablesGenes[[2]],c("LPS_macro_0h_Triptolide_m5_4_exon","LPS_macro_3h_Triptolide_m5_6_exon")]
points(x=log10(avg_data[,1]), y=log10(avg_data[,2]),
       col = alpha("red", 1) , pch = 20, cex = 1)
legend("bottomright", legend = "stables genes", col = c("red"), pch = 20 )

# Replicate Mean's
avg_data = cbind(rowMeans(data[,1:3]), rowMeans(data[,4:6]))
plot(x=log10(avg_data[,1]), y=log10(avg_data[,2]),
     col = alpha(color_activated, 0.1) , pch = 20, cex = 1,
     xlab = "0h Triptolide ( log10(average_raw_counts), n = 3 )", ylab = "3h Triptolide( log10(average_raw_counts), n = 3 )",
     main = "Normalization verification in Activated macrophage - Mean "
)

abline(a = 0, b = 1, col="dark blue")
avg_data = avg_data[rownames(avg_data) %in% stablesGenes[[2]],]
points(x=log10(avg_data[,1]), y=log10(avg_data[,2]),
       col = alpha("red", 1) , pch = 20, cex = 1)
legend("bottomright", legend = "stables genes", col = c("red"), pch = 20 )


# 3. Lympho Resting -------------------------------------------------------

lymphoRestingCounts = all_data[grep("ENSMUSG*", rownames(all_data)), grep("^R[2-4]_.*(_exon$)", colnames(all_data))]

colData = as.data.frame((colnames(lymphoRestingCounts)));
conds = str_extract(colData[,1], "Resting_.*")
colData<-cbind(colData, conds)
colnames(colData) = c("librairies","group");
sumd = apply(X=lymphoRestingCounts,MARGIN=1,FUN=sum);
filtd = subset(lymphoRestingCounts, sumd > 20);

lymphoRestingStableCounts = filtd[rownames(filtd)%in%stablesGenes[[1]],]

dds = DESeqDataSetFromMatrix(countData=as.matrix(filtd), colData=colData, design = ~ group); 
sizeFactors(dds) <- estimateSizeFactorsForMatrix(lymphoRestingStableCounts)
dds <- DESeq(dds)

lymphoRestingCountsNormalized = counts(dds, normalized = TRUE)

data = lymphoRestingCountsNormalized[,grep("(0|3)h_Triptolide_exon", colnames(lymphoRestingCountsNormalized))]
# rep 1
plot(x=log10(data[,"R2_4_Resting_0h_Triptolide_exon"]), y=log10(data[,"R2_6_Resting_3h_Triptolide_exon"]),
     col = alpha(color_resting, 0.1) , pch = 20, cex = 1,
     xlab = "0h Triptolide ( log10(average_raw_counts), rep1 )", ylab = "3h Triptolide( log10(average_raw_counts), rep1)",
     main = "Normalization verification in Resting Lympho - Rep1"
)
abline(a=0, b=1, col="dark blue")
avg_data = data[rownames(data)%in%stablesGenes[[1]],c("R2_4_Resting_0h_Triptolide_exon","R2_6_Resting_3h_Triptolide_exon")]
points(x=log10(avg_data[,1]), y=log10(avg_data[,2]),
       col = alpha("red", 1) , pch = 20, cex = 1)
legend("bottomright", legend = "stables genes", col = c("red"), pch = 20 )

# rep 2
plot(x=log10(data[,"R3_4_Resting_0h_Triptolide_exon"]), y=log10(data[,"R3_6_Resting_3h_Triptolide_exon"]),
     col = alpha(color_resting, 0.1) , pch = 20, cex = 1,
     xlab = "0h Triptolide ( log10(average_raw_counts), rep2 )", ylab = "3h Triptolide( log10(average_raw_counts), rep2)",
     main = "Normalization verification in Resting Lympho - Rep2"
)
abline(a=0, b=1, col="dark blue")
avg_data = data[rownames(data)%in%stablesGenes[[1]],c("R3_4_Resting_0h_Triptolide_exon","R3_6_Resting_3h_Triptolide_exon")]
points(x=log10(avg_data[,1]), y=log10(avg_data[,2]),
       col = alpha("red", 1) , pch = 20, cex = 1)
legend("bottomright", legend = "stables genes", col = c("red"), pch = 20 )

# rep 3
plot(x=log10(data[,"R4_4_Resting_0h_Triptolide_exon"]), y=log10(data[,"R4_6_Resting_3h_Triptolide_exon"]),
     col = alpha(color_resting, 0.1) , pch = 20, cex = 1,
     xlab = "0h Triptolide ( log10(average_raw_counts), rep3 )", ylab = "3h Triptolide( log10(average_raw_counts), rep3)",
     main = "Normalization verification in Resting Lympho - Rep3"
)
abline(a=0, b=1, col="dark blue")
avg_data = data[rownames(data)%in%stablesGenes[[1]],c("R4_4_Resting_0h_Triptolide_exon","R4_6_Resting_3h_Triptolide_exon")]
points(x=log10(avg_data[,1]), y=log10(avg_data[,2]),
       col = alpha("red", 1) , pch = 20, cex = 1)
legend("bottomright", legend = "stables genes", col = c("red"), pch = 20 )

# Replicate Mean's
avg_data = cbind(rowMeans(data[,c("R2_4_Resting_0h_Triptolide_exon","R3_4_Resting_0h_Triptolide_exon","R4_4_Resting_0h_Triptolide_exon")]), 
                 rowMeans(data[,c("R2_6_Resting_3h_Triptolide_exon","R3_6_Resting_3h_Triptolide_exon","R4_6_Resting_3h_Triptolide_exon")]))
plot(x=log10(avg_data[,1]), y=log10(avg_data[,2]),
     col = alpha(color_resting, 0.1) , pch = 20, cex = 1,
     xlab = "0h Triptolide ( log10(average_raw_counts), n = 3 )", ylab = "3h Triptolide( log10(average_raw_counts), n = 3 )",
     main = "Normalization verification in Resting Lympho - Mean "
)

abline(a=0, b=1, col="dark blue")
avg_data = avg_data[rownames(avg_data)%in%stablesGenes[[1]],]
points(x=log10(avg_data[,1]), y=log10(avg_data[,2]),
       col = alpha("red", 1) , pch = 20, cex = 1)
legend("bottomright", legend = "stables genes", col = c("red"), pch = 20 )

# 4. Lympho Activated -----------------------------------------------------

lymphoActivatedCounts = all_data[grep("ENSMUSG*", rownames(all_data)), grep("^A[2-4]_.*(_exon$)", colnames(all_data))]

colData = as.data.frame((colnames(lymphoActivatedCounts)));
conds = str_extract(colData[,1], "Activated_.*")
colData<-cbind(colData, conds)
colnames(colData) = c("librairies","group");
sumd = apply(X=lymphoActivatedCounts,MARGIN=1,FUN=sum);
filtd = subset(lymphoActivatedCounts, sumd > 50);

lymphoActivatedStableCounts = filtd[rownames(filtd)%in%stablesGenes[[1]],]

dds = DESeqDataSetFromMatrix(countData=as.matrix(filtd), colData=colData, design = ~ group);
sizeFactors(dds) <- estimateSizeFactorsForMatrix(lymphoActivatedStableCounts)
dds <- DESeq(dds)

lymphoActivatedCountsNormalized = counts(dds, normalized = TRUE)
# boxplot(log10(lymphoActivatedCountsNormalized), outline = FALSE, las = 2, main = "Normalisation via genes stables")

data = lymphoActivatedCountsNormalized[,grep("(0|3)h_Triptolide_exon", colnames(lymphoActivatedCountsNormalized))]
# rep 1
plot(x=log10(data[,"A2_7_Activated_0h_Triptolide_exon"]), y=log10(data[,"A2_9_Activated_3h_Triptolide_exon"]),
     col = alpha(color_activated, 0.1) , pch = 20, cex = 1,
     xlab = "0h Triptolide ( log10(average_raw_counts), rep1 )", ylab = "3h Triptolide( log10(average_raw_counts), rep1)",
     main = "Normalization verification in Activated Lympho - Rep1"
)
abline(a=0, b=1, col="dark blue")
avg_data = data[rownames(data)%in%stablesGenes[[1]],c("A2_7_Activated_0h_Triptolide_exon","A2_9_Activated_3h_Triptolide_exon")]
points(x=log10(avg_data[,1]), y=log10(avg_data[,2]),
       col = alpha("red", 1) , pch = 20, cex = 1)
legend("bottomright", legend = "stables genes", col = c("red"), pch = 20 )

# rep 2
plot(x=log10(data[,"A3_7_Activated_0h_Triptolide_exon"]), y=log10(data[,"A3_9_Activated_3h_Triptolide_exon"]),
     col = alpha(color_activated, 0.1) , pch = 20, cex = 1,
     xlab = "0h Triptolide ( log10(average_raw_counts), rep2 )", ylab = "3h Triptolide( log10(average_raw_counts), rep2)",
     main = "Normalization verification in Activated Lympho - Rep2"
)
abline(a=0, b=1, col="dark blue")
avg_data = data[rownames(data)%in%stablesGenes[[1]],c("A3_7_Activated_0h_Triptolide_exon","A3_9_Activated_3h_Triptolide_exon")]
points(x=log10(avg_data[,1]), y=log10(avg_data[,2]),
       col = alpha("red", 1) , pch = 20, cex = 1)
legend("bottomright", legend = "stables genes", col = c("red"), pch = 20 )

# rep 3
plot(x=log10(data[,"A4_7_Activated_0h_Triptolide_exon"]), y=log10(data[,"A4_9_Activated_3h_Triptolide_exon"]),
     col = alpha(color_activated, 0.1) , pch = 20, cex = 1,
     xlab = "0h Triptolide ( log10(average_raw_counts), rep3 )", ylab = "3h Triptolide( log10(average_raw_counts), rep3)",
     main = "Normalization verification in Activated Lympho - Rep3"
)
abline(a=0, b=1, col="dark blue")
avg_data = data[rownames(data)%in%stablesGenes[[1]],c("A4_7_Activated_0h_Triptolide_exon","A4_9_Activated_3h_Triptolide_exon")]
points(x=log10(avg_data[,1]), y=log10(avg_data[,2]),
       col = alpha("red", 1) , pch = 20, cex = 1)
legend("bottomright", legend = "stables genes", col = c("red"), pch = 20 )

# Replicate Mean's
avg_data = cbind(rowMeans(data[,c("A2_7_Activated_0h_Triptolide_exon","A3_7_Activated_0h_Triptolide_exon","A4_7_Activated_0h_Triptolide_exon")]), 
                 rowMeans(data[,c("A2_9_Activated_3h_Triptolide_exon","A3_9_Activated_3h_Triptolide_exon","A4_9_Activated_3h_Triptolide_exon")]))
plot(x=log10(avg_data[,1]), y=log10(avg_data[,2]),
     col = alpha(color_activated, 0.1) , pch = 20, cex = 1,
     xlab = "0h Triptolide ( log10(average_raw_counts), n = 3 )", ylab = "3h Triptolide( log10(average_raw_counts), n = 3 )",
     main = "Normalization verification in Activated Lympho - Mean "
)

abline(a=0, b=1, col="dark blue")
avg_data = avg_data[rownames(avg_data)%in%stablesGenes[[1]],]
points(x=log10(avg_data[,1]), y=log10(avg_data[,2]),
       col = alpha("red", 1) , pch = 20, cex = 1)
legend("bottomright", legend = "stables genes", col = c("red"), pch = 20 )


# 5. Formatting data in database ---------------------------------------------

library(reshape2)

data <- list(macroActivatedCountsNormalized, macroRestingCountsNormalized, lymphoActivatedCountsNormalized, lymphoRestingCountsNormalized)
dball = matrix(nrow=0, ncol = 8)
colnames(dball) = c("ensemblID", "librairies","normReadsCounts", "cell","Activated/Resting", "treatment", "time", "replicate")

for (i in 1:4)
{
  db = as.data.frame(data[[i]])
  db =(cbind(rownames(db), db))
  db = melt(db)
  empty = matrix(nrow=length(db[,1]), ncol = 5)
  db <- cbind(db, empty)
  colnames(db) = c("ensemblID", "librairies","normReadsCounts", "cell","Activated/Resting", "treatment", "time", "replicate")
  
  ## cell
  feature="macro"
  db[grep("macro", db[,2]),"cell"]= feature
  feature="Lympho"
  db[grep("(R[1-9]_)|(A[1-9]_)", db[,2]),"cell"]=feature
  which(is.na(db[,"cell"])) # check is any row don't have cell feature
  
  feature = "Activated"
  db[grep("(LPS_)|(A[1-9]_)", db[,2]),"Activated/Resting"]= feature
  feature = "Resting"
  db[grep("(LPSno_)|(R[1-9]_)", db[,2]),"Activated/Resting"]= feature
  which(is.na(db[,"Activated/Resting"])) # check is any row don't have cell feature
  
  ## treatment
  feature = "untreated"
  db[grep("_untreated_", db[,2]),"treatment"]= feature
  feature = "Trip"
  db[grep("_Triptolide_(?!CHX|HARR|Harr)", db[,2], perl=TRUE),"treatment"]= feature
  feature = "TripCHX"
  db[grep("_Triptolide_CHX_", db[,2], perl=TRUE),"treatment"]= feature
  feature = "TripHarr"
  db[grep("_Triptolide_(Harr|HARR)_", db[,2], perl=TRUE),"treatment"]= feature
  feature = "CHX"
  db[grep("h_CHX_", db[,2], perl=TRUE),"treatment"]= feature
  feature = "DRB"
  db[grep("_DRB_(?!CHX|HARR|Harr)", db[,2], perl=TRUE),"treatment"]= feature
  feature = "DRBCHX"
  db[grep("_DRB_CHX_", db[,2], perl=TRUE),"treatment"]= feature
  feature = "Harr"
  db[grep("_Harringtonine_", db[,2], perl=TRUE),"treatment"]= feature
  feature = "DRBHarr"
  db[grep("_DRB_(Harr|HARR)_", db[,2], perl=TRUE),"treatment"]= feature
  db[which(is.na(db[,"treatment"])),] # check is any row don't have feature

  ## time
  feature = "0h"
  db[grep("0h", db[,2]),"time"]= feature
  feature = "1h"
  db[grep("1h", db[,2]),"time"]= feature
  feature = "3h"
  db[grep("3h", db[,2]),"time"]= feature
  
  db[which(is.na(db[,"time"])),] # check is any row don't have feature

  ## replicate
  feature = "rep1"
  db[grep("(A|R)2_|(_(m|i)3_)", db[,2]),"replicate"]= feature
  feature = "rep2"
  db[grep("(A|R)3_|(_(m|i)4_)", db[,2]),"replicate"]= feature
  feature = "rep3"
  db[grep("(A|R)4_|(_(m|i)5_)", db[,2]),"replicate"]= feature
  
  db[which(is.na(db[,])),] # check is any row don't have feature
  
  dball = rbind(dball, db)
}

save(dball, file = "results/dbNormCountsAll_exon.RData")
write.csv(dball, "results/dbNormCountsAll_exon.csv")


# 6. Verif plots : sum and median of transcriptome --------------------------------------------------------

## a. Macro A
MacroActivatedCounts = dball[dball$cell=="macro"&dball$`Activated/Resting`=="Activated",]
filtredgenes = as.data.frame(unique(MacroActivatedCounts[MacroActivatedCounts$time=="0h"&MacroActivatedCounts$normReadsCounts>25&MacroActivatedCounts$treatment=="untreated","ensemblID"]))
colnames(filtredgenes) = "ensemblID"
MacroActivatedCounts = merge(MacroActivatedCounts, filtredgenes, by = "ensemblID")
# transforming time values in numeric for plotting
temps = 0
MacroActivatedCounts[grep("0h", MacroActivatedCounts$time),"time"] = temps
temps = 1
MacroActivatedCounts[grep("1h", MacroActivatedCounts$time),"time"] = temps
temps = 3
MacroActivatedCounts[grep("3h", MacroActivatedCounts$time),"time"] = temps

MacroActivatedCounts$normReadsCounts = as.numeric(as.character(MacroActivatedCounts$normReadsCounts))

b <- as.data.frame(MacroActivatedCounts)
b <- group_by(b, treatment, time, replicate)
b <- summarise(b, median=median(normReadsCounts), sd=sd(normReadsCounts))

# Create Plot Mean +/- sd
ggplot(b, aes(x=time, y=median, color=treatment, pch = replicate, group = treatment)) +
  #geom_errorbar(aes(ymin=mean-sd, ymax=mean+sd), width=.1) +
  geom_line() +
  geom_point(cex = 2) +
  ggtitle("Macro Activated - median of all transcriptome")


b<- as.data.frame(MacroActivatedCounts)
b <- group_by(b, treatment, time, replicate)
b <- summarise(b, sum=sum(normReadsCounts), sd=sd(normReadsCounts))

ggplot(b, aes(x=time, y=sum, color=treatment, pch = replicate, group = treatment)) +
  #geom_errorbar(aes(ymin=mean-sd, ymax=mean+sd), width=.1) +
  geom_line() +
  geom_point(cex = 2) +
  ggtitle("Macro Actiavted - sum of all transcriptome")

# b. Macro R
MacroRestingCounts = dball[dball$cell=="macro"&dball$`Activated/Resting`=="Resting",]
filtredgenes = as.data.frame(unique(MacroRestingCounts[MacroRestingCounts$time=="0h"&MacroRestingCounts$normReadsCounts>25&MacroRestingCounts$treatment=="untreated","ensemblID"]))
colnames(filtredgenes) = "ensemblID"
MacroRestingCounts = merge(MacroRestingCounts, filtredgenes, by = "ensemblID")
# transforming time values in numeric for plotting
temps = 0
MacroRestingCounts[grep("0h", MacroRestingCounts$time),"time"] = temps
temps = 1
MacroRestingCounts[grep("1h", MacroRestingCounts$time),"time"] = temps
temps = 3
MacroRestingCounts[grep("3h", MacroRestingCounts$time),"time"] = temps

MacroRestingCounts$normReadsCounts = as.numeric(as.character(MacroRestingCounts$normReadsCounts))

b<- as.data.frame(MacroRestingCounts)
b <- group_by(b, treatment, time, replicate)
b <- summarise(b, median=median(normReadsCounts), sd=sd(normReadsCounts))

# Create Plot Mean +/- sd
ggplot(b, aes(x=time, y=median, color=treatment, group = treatment, pch = replicate)) +
  #geom_errorbar(aes(ymin=mean-sd, ymax=mean+sd), width=.1) +
  geom_line() +
  geom_point(cex = 2) +
  ggtitle("Macro Resting - median of all transcriptome")

b<- as.data.frame(MacroRestingCounts)
b <- group_by(b, treatment, time, replicate)
b <- summarise(b, sum=sum(normReadsCounts))

ggplot(b, aes(x=time, y=sum, color=treatment, group = treatment, pch = replicate)) +
  #geom_errorbar(aes(ymin=mean-sd, ymax=mean+sd), width=.1) +
  geom_line() +
  geom_point(cex = 2) +
  ggtitle("Macro Resting - sum of all transcriptome")

# c. Lympho R
lymphoRestingCounts = dball[dball$cell=="Lympho"&dball$`Activated/Resting`=="Resting",]
filtredgenes = as.data.frame(unique(lymphoRestingCounts[lymphoRestingCounts$time=="0h"&lymphoRestingCounts$normReadsCounts>25&lymphoRestingCounts$treatment=="untreated","ensemblID"]))
colnames(filtredgenes) = "ensemblID"
lymphoRestingCounts = merge(lymphoRestingCounts, filtredgenes, by = "ensemblID")
# transforming time values in numeric for plotting
temps = 0
lymphoRestingCounts[grep("0h", lymphoRestingCounts$time),"time"] = temps
temps = 1
lymphoRestingCounts[grep("1h", lymphoRestingCounts$time),"time"] = temps
temps = 3
lymphoRestingCounts[grep("3h", lymphoRestingCounts$time),"time"] = temps

lymphoRestingCounts$normReadsCounts = as.numeric(as.character(lymphoRestingCounts$normReadsCounts))

b<- as.data.frame(lymphoRestingCounts)
b <- group_by(b, treatment, time, replicate)
b <- summarise(b, median=median(normReadsCounts))

# Create Plot Mean +/- sd
ggplot(b, aes(x=time, y=median, color=treatment, group = treatment, pch = replicate)) +
  #geom_errorbar(aes(ymin=median-sd, ymax=median+sd), width=.1) +
  geom_line() +
  geom_point() +
  ggtitle("Lympho Resting - median of all transcriptome")

b<- as.data.frame(lymphoRestingCounts)
b <- group_by(b, treatment, time, replicate)
b <- summarise(b, sum=sum(normReadsCounts))

ggplot(b, aes(x=time, y=sum, color=treatment, group = treatment, pch = replicate)) +
  #geom_errorbar(aes(ymin=mean-sd, ymax=mean+sd), width=.1) +
  geom_line() +
  geom_point() +
  ggtitle("Lympho Resting - sum of all transcriptome")


# d. Lympho A
lymphoActivatedCounts = dball[dball$cell=="Lympho"&dball$`Activated/Resting`=="Activated",]
filtredgenes = as.data.frame(unique(lymphoActivatedCounts[lymphoActivatedCounts$time=="0h"&lymphoActivatedCounts$normReadsCounts>25&lymphoActivatedCounts$treatment=="untreated","ensemblID"]))
colnames(filtredgenes) = "ensemblID"
lymphoActivatedCounts = merge(lymphoActivatedCounts, filtredgenes, by = "ensemblID")
# transforming time values in numeric for plotting
temps = 0
lymphoActivatedCounts[grep("0h", lymphoActivatedCounts$time),"time"] = temps
temps = 1
lymphoActivatedCounts[grep("1h", lymphoActivatedCounts$time),"time"] = temps
temps = 3
lymphoActivatedCounts[grep("3h", lymphoActivatedCounts$time),"time"] = temps

lymphoActivatedCounts$normReadsCounts = as.numeric(as.character(lymphoActivatedCounts$normReadsCounts))

b<- as.data.frame(lymphoActivatedCounts)
b <- group_by(b, treatment, time, replicate)
b <- summarise(b, median=median(normReadsCounts), sd=sd(normReadsCounts))

# Create Plot Mean +/- sd
ggplot(b, aes(x=time, y=median, color=treatment,  group = treatment, pch = replicate)) +
  #geom_errorbar(aes(ymin=mean-sd, ymax=mean+sd), width=.1) +
  geom_line() +
  geom_point() +
  ggtitle("Lympho Activated - median of all transcriptome")

b<- as.data.frame(lymphoActivatedCounts)
b <- group_by(b, treatment, time, replicate)
b <- summarise(b, sum=sum(normReadsCounts))

ggplot(b, aes(x=time, y=sum, color=treatment, group = treatment, pch = replicate)) +
  #geom_errorbar(aes(ymin=mean-sd, ymax=mean+sd), width=.1) +
  geom_line() +
  geom_point() +
  ggtitle("Lympho Resting - sum of all transcriptome")


# 7. Check that T0Trip of each replicate not deviate the mean -------------

# Lympho R
data = dball[dball$cell=="Lympho"&
               dball$`Activated/Resting`=="Resting"&
               dball$time=="0h"&
               dball$treatment=="Trip",]
data <- as.data.frame(data)
data <- group_by(data, ensemblID)
data <- summarise(data,
                  normCounts_mean = mean(normReadsCounts))
rep1 = dball[dball$cell=="Lympho"&
               dball$`Activated/Resting`=="Resting"&
               dball$time=="0h"&
               dball$treatment=="Trip"&
               dball$replicate=="rep1",c("ensemblID", "normReadsCounts")]
rep2 = dball[dball$cell=="Lympho"&
               dball$`Activated/Resting`=="Resting"&
               dball$time=="0h"&
               dball$treatment=="Trip"&
               dball$replicate=="rep2",c("ensemblID", "normReadsCounts")]
rep3 = dball[dball$cell=="Lympho"&
               dball$`Activated/Resting`=="Resting"&
               dball$time=="0h"&
               dball$treatment=="Trip"&
               dball$replicate=="rep3",c("ensemblID", "normReadsCounts")]
data = merge(data,rep1,by= "ensemblID")
data = merge(data,rep2,by= "ensemblID")
data = merge(data,rep3,by= "ensemblID")
colnames(data) = c("ensemblID", "mean","rep1","rep2","rep3")

ggplot(data) + 
  geom_point(aes(x = mean, y=rep1, color = "red"), alpha = 0.15) + 
  geom_point(aes(x = mean, y=rep2, color = "green"), alpha = 0.15) + 
  geom_point(aes(x = mean, y=rep3, color = "blue"), alpha = 0.15) + 
  geom_abline(slope = 1, intercept = 0) + 
  scale_x_log10() + scale_y_log10() + scale_color_discrete( labels = c("rep1","rep2", "rep3"))+
  xlab("norm Counts of replicate") + ylab("norm Counts mean") + 
  ggtitle("Lympho R") + theme(plot.title = element_text(hjust = 0.5))

#Lympho A
data = dball[dball$cell=="Lympho"&
               dball$`Activated/Resting`=="Activated"&
               dball$time=="0h"&
               dball$treatment=="Trip",]
data <- as.data.frame(data)
data <- group_by(data, ensemblID)
data <- summarise(data,
                  normCounts_mean = mean(normReadsCounts))
rep1 = dball[dball$cell=="Lympho"&
               dball$`Activated/Resting`=="Activated"&
               dball$time=="0h"&
               dball$treatment=="Trip"&
               dball$replicate=="rep1",c("ensemblID", "normReadsCounts")]
rep2 = dball[dball$cell=="Lympho"&
               dball$`Activated/Resting`=="Activated"&
               dball$time=="0h"&
               dball$treatment=="Trip"&
               dball$replicate=="rep2",c("ensemblID", "normReadsCounts")]
rep3 = dball[dball$cell=="Lympho"&
               dball$`Activated/Resting`=="Activated"&
               dball$time=="0h"&
               dball$treatment=="Trip"&
               dball$replicate=="rep3",c("ensemblID", "normReadsCounts")]
data = merge(data,rep1,by= "ensemblID")
data = merge(data,rep2,by= "ensemblID")
data = merge(data,rep3,by= "ensemblID")
colnames(data) = c("ensemblID", "mean","rep1","rep2","rep3")

ggplot(data) + 
  geom_point(aes(x = mean, y=rep1, color = "red"), alpha = 0.15) + 
  geom_point(aes(x = mean, y=rep2, color = "green"), alpha = 0.15) + 
  geom_point(aes(x = mean, y=rep3, color = "blue"), alpha = 0.15) + 
  geom_abline(slope = 1, intercept = 0) + 
  scale_x_log10() + scale_y_log10() + scale_color_discrete( labels = c("rep1","rep2", "rep3"))+
  xlab("norm Counts of each replicate") + ylab("norm Counts mean") + 
  ggtitle("Lympho A") + theme(plot.title = element_text(hjust = 0.5))

# Macro R
data = dball[dball$cell=="macro"&
               dball$`Activated/Resting`=="Resting"&
               dball$time=="0h"&
               dball$treatment=="Trip",]
data <- as.data.frame(data)
data <- group_by(data, ensemblID)
data <- summarise(data,
                  normCounts_mean = mean(normReadsCounts))
rep1 = dball[dball$cell=="macro"&
               dball$`Activated/Resting`=="Resting"&
               dball$time=="0h"&
               dball$treatment=="Trip"&
               dball$replicate=="rep1",c("ensemblID", "normReadsCounts")]
rep2 = dball[dball$cell=="macro"&
               dball$`Activated/Resting`=="Resting"&
               dball$time=="0h"&
               dball$treatment=="Trip"&
               dball$replicate=="rep2",c("ensemblID", "normReadsCounts")]
rep3 = dball[dball$cell=="macro"&
               dball$`Activated/Resting`=="Resting"&
               dball$time=="0h"&
               dball$treatment=="Trip"&
               dball$replicate=="rep3",c("ensemblID", "normReadsCounts")]
data = merge(data,rep1,by= "ensemblID")
data = merge(data,rep2,by= "ensemblID")
data = merge(data,rep3,by= "ensemblID")
colnames(data) = c("ensemblID", "mean","rep1","rep2","rep3")

ggplot(data) + 
  geom_point(aes(x = mean, y=rep1, color = "red"), alpha = 0.15) + 
  geom_point(aes(x = mean, y=rep2, color = "green"), alpha = 0.15) + 
  geom_point(aes(x = mean, y=rep3, color = "blue"), alpha = 0.15) + 
  geom_abline(slope = 1, intercept = 0) + 
  scale_x_log10() + scale_y_log10() + scale_color_discrete( labels = c("rep1","rep2", "rep3"))+
  xlab("norm Counts of replicate") + ylab("norm Counts mean") + 
  ggtitle("macro R") + theme(plot.title = element_text(hjust = 0.5))

#Macro A
data = dball[dball$cell=="macro"&
               dball$`Activated/Resting`=="Activated"&
               dball$time=="0h"&
               dball$treatment=="Trip",]
data <- as.data.frame(data)
data <- group_by(data, ensemblID)
data <- summarise(data,
                  normCounts_mean = mean(normReadsCounts))
rep1 = dball[dball$cell=="macro"&
               dball$`Activated/Resting`=="Activated"&
               dball$time=="0h"&
               dball$treatment=="Trip"&
               dball$replicate=="rep1",c("ensemblID", "normReadsCounts")]
rep2 = dball[dball$cell=="macro"&
               dball$`Activated/Resting`=="Activated"&
               dball$time=="0h"&
               dball$treatment=="Trip"&
               dball$replicate=="rep2",c("ensemblID", "normReadsCounts")]
rep3 = dball[dball$cell=="macro"&
               dball$`Activated/Resting`=="Activated"&
               dball$time=="0h"&
               dball$treatment=="Trip"&
               dball$replicate=="rep3",c("ensemblID", "normReadsCounts")]
data = merge(data,rep1,by= "ensemblID")
data = merge(data,rep2,by= "ensemblID")
data = merge(data,rep3,by= "ensemblID")
colnames(data) = c("ensemblID", "mean","rep1","rep2","rep3")

ggplot(data) + 
  geom_point(aes(x = mean, y=rep1, color = "red"), alpha = 0.15) + 
  geom_point(aes(x = mean, y=rep2, color = "green"), alpha = 0.15) + 
  geom_point(aes(x = mean, y=rep3, color = "blue"), alpha = 0.15) + 
  geom_abline(slope = 1, intercept = 0) + 
  scale_x_log10() + scale_y_log10() + scale_color_discrete( labels = c("rep1","rep2", "rep3"))+
  xlab("norm Counts of each replicate") + ylab("norm Counts mean") + 
  ggtitle("macro A") + theme(plot.title = element_text(hjust = 0.5))


# 8. Check that T3Trip and T3TripCHX replicate in LymphoR --------------------

data = dball[dball$cell=="Lympho"&
               dball$`Activated/Resting`=="Resting"&
               dball$time=="3h"&
               dball$treatment=="Trip",]
data <- as.data.frame(data)
data <- group_by(data, ensemblID)
data <- summarise(data,
                  normCounts_mean = mean(normReadsCounts))
rep1 = dball[dball$cell=="Lympho"&
               dball$`Activated/Resting`=="Resting"&
               dball$time=="3h"&
               dball$treatment=="Trip"&
               dball$replicate=="rep1",c("ensemblID", "normReadsCounts")]
rep2 = dball[dball$cell=="Lympho"&
               dball$`Activated/Resting`=="Resting"&
               dball$time=="3h"&
               dball$treatment=="Trip"&
               dball$replicate=="rep2",c("ensemblID", "normReadsCounts")]
rep3 = dball[dball$cell=="Lympho"&
               dball$`Activated/Resting`=="Resting"&
               dball$time=="3h"&
               dball$treatment=="Trip"&
               dball$replicate=="rep3",c("ensemblID", "normReadsCounts")]
data = merge(data,rep1,by= "ensemblID")
data = merge(data,rep2,by= "ensemblID")
data = merge(data,rep3,by= "ensemblID")
colnames(data) = c("ensemblID", "mean","rep1","rep2","rep3")

ggplot(data) + 
  geom_point(aes(x = mean, y=rep1, color = "red"), alpha = 0.15) + 
  geom_point(aes(x = mean, y=rep2, color = "green"), alpha = 0.15) + 
  geom_point(aes(x = mean, y=rep3, color = "blue"), alpha = 0.15) + 
  geom_abline(slope = 1, intercept = 0) + 
  scale_x_log10() + scale_y_log10() + scale_color_discrete( labels = c("rep1","rep2", "rep3"))+
  xlab("norm Counts of replicate") + ylab("norm Counts mean") + 
  ggtitle("Lympho R - T3 Trip") + theme(plot.title = element_text(hjust = 0.5))

# 3h TripCHX

data = dball[dball$cell=="Lympho"&
               dball$`Activated/Resting`=="Resting"&
               dball$time=="3h"&
               dball$treatment=="TripCHX",]
data <- as.data.frame(data)
data <- group_by(data, ensemblID)
data <- summarise(data,
                  normCounts_mean = mean(normReadsCounts))
rep1 = dball[dball$cell=="Lympho"&
               dball$`Activated/Resting`=="Resting"&
               dball$time=="3h"&
               dball$treatment=="TripCHX"&
               dball$replicate=="rep1",c("ensemblID", "normReadsCounts")]
rep2 = dball[dball$cell=="Lympho"&
               dball$`Activated/Resting`=="Resting"&
               dball$time=="3h"&
               dball$treatment=="TripCHX"&
               dball$replicate=="rep2",c("ensemblID", "normReadsCounts")]
rep3 = dball[dball$cell=="Lympho"&
               dball$`Activated/Resting`=="Resting"&
               dball$time=="3h"&
               dball$treatment=="TripCHX"&
               dball$replicate=="rep3",c("ensemblID", "normReadsCounts")]
data = merge(data,rep1,by= "ensemblID")
data = merge(data,rep2,by= "ensemblID")
data = merge(data,rep3,by= "ensemblID")
colnames(data) = c("ensemblID", "mean","rep1","rep2","rep3")

ggplot(data) + 
  geom_point(aes(x = mean, y=rep1, color = "red"), alpha = 0.15) + 
  geom_point(aes(x = mean, y=rep2, color = "green"), alpha = 0.15) + 
  geom_point(aes(x = mean, y=rep3, color = "blue"), alpha = 0.15) + 
  geom_abline(slope = 1, intercept = 0) + 
  scale_x_log10() + scale_y_log10() + scale_color_discrete( labels = c("rep1","rep2", "rep3"))+
  xlab("norm Counts of replicate") + ylab("norm Counts mean") + 
  ggtitle("Lympho R - T3 TripCHX") + theme(plot.title = element_text(hjust = 0.5))


### 3h Trip VS 3h TripCHX by replicate in one plot
data = dball[dball$cell=="Lympho"&
               dball$`Activated/Resting`=="Resting"&
               dball$time=="3h"&
               (dball$treatment=="Trip"|dball$treatment=="TripCHX"),]

Trip = data[data$treatment=="Trip",]
TripCHX = data[data$treatment=="TripCHX",]

data = merge(Trip[,c("ensemblID", "replicate", "normReadsCounts")], TripCHX[,c("ensemblID", "replicate", "normReadsCounts")], by = c("ensemblID","replicate"))

colnames(data) = c("ensemblID", "replicate", "3hTrip", "3hTripCHX")

ggplot(data, aes(y = `3hTrip`, x=`3hTripCHX`, color = replicate)) + 
  geom_point(alpha = 0.15) + 
  geom_abline(slope = 1, intercept = 0) + 
  scale_x_log10() + scale_y_log10() +
  ylab("3h Triptolide - Norm Counts") + xlab("3h Triptolide + CHX - Norm Counts") + 
  ggtitle("Lympho R - T3 TripCHX vs T3 Trip") + theme(plot.title = element_text(hjust = 0.5))

### 0h Trip VS 3h Trip by replicate in one plot
data = dball[dball$cell=="Lympho"&
               dball$`Activated/Resting`=="Resting"&
               (dball$time=="3h"|dball$time=="0h")&
               dball$treatment=="Trip",]

a = data[data$time=="0h",]
b = data[data$time=="3h",]

data = merge(a[,c("ensemblID", "replicate", "normReadsCounts")], b[,c("ensemblID", "replicate", "normReadsCounts")], by = c("ensemblID","replicate"))

colnames(data) = c("ensemblID", "replicate", "a", "b")

ggplot(data, aes(y = b , x=a, color = replicate)) + 
  geom_point(alpha = 0.15) + 
  geom_abline(slope = 1, intercept = 0) + 
  scale_x_log10() + scale_y_log10() +
  ylab("3h Triptolide - Norm Counts") + xlab("0h Triptolide - Norm Counts") + 
  ggtitle("Lympho R - T0 Trip vs T3 Trip") + theme(plot.title = element_text(hjust = 0.5))

dev.off()

