
source("src/CSC_functions.R")

fastaFile <- readDNAStringSet("~/RMI2/gitlab/tdd/data/cds_seq_mm10.txt")
codon_counts <- count_codon(fastaFile)
codon_freq <- freq_codon(codon_counts)
df_codon_freq <- list_to_df(codon_freq)
df_codon_count <- list_to_df(codon_counts)
aa_freq <- freq_AA(fastaFile)
df_aa_freq <- list_to_df(freq_AA(fastaFile), codon = FALSE)

# Absolute TDD index ------------------------------------------------------

index_resting <- "Abs.TDD..Lympho_Resting.Trip_CHX.Ref_Trip_0h.3h"
index_activated <- "Abs.TDD..Lympho_Activated.Trip_CHX.Ref_Trip_0h.3h"

rCSC_resting <- get_rCSC(df_codon_freq = df_codon_freq, index = index_resting)
rCSC_activated <- get_rCSC(df_codon_freq = df_codon_freq, index = index_activated)

pval_resting <- pval_perm(cds = fastaFile,
                          index_name = index_resting,
                          CSC_ref = rCSC_resting,
                          n_perm = 10000)

# correct pval for multiple test
pval_resting[pval_resting == "<1e-04"] <- 1/10000
pval_resting <- as.data.frame(pval_resting, row.names = names(pval_resting))
colnames(pval_resting) <- "pval"
pval_resting$pval <- as.numeric(as.character(pval_resting$pval))
pval_resting$FDR <- p.adjust(pval_resting$pval,  method = "BY")

# codon no longer significatif after correction :
# rownames(pval_resting)[pval_resting$pval<0.05 & pval_resting$FDR>0.05]
# "AAA" "ACA" "ACT" "GAA" "GTC" "TCC"
# 
save(pval_resting, file = paste0("results/pvalCSC_",index_resting,".RData"))
# 
  pval_activated <- pval_perm(cds = fastaFile,
                              index_name = index_activated,
                              CSC_ref = rCSC_activated,
                              n_perm = 10000)
  
  # correct pval for multiple test
  pval_activated[pval_activated == "<1e-04"] <- 1/10000
  pval_activated <- as.data.frame(pval_activated, row.names = names(pval_activated))
  colnames(pval_activated) <- "pval"
  pval_activated$pval <- as.numeric(as.character(pval_activated$pval))
  pval_activated$FDR <- p.adjust(pval_activated$pval,  method = "BY")
  
  # codon no longer significatif after correction :
  # rownames(pval_activated)[pval_activated$pval<0.05 & pval_activated$FDR>0.05]
  # "GTT" "TCA" "TCC" "TCG" "TTC" "TTG" "TTT"
  
  save(pval_activated, file = paste0("results/pvalCSC_",index_activated,".RData"))
  
  # # Non TDD index -----------------------------------------------------------
  
  index_resting <- "Abs.NonTDD..Lympho_Resting.Trip_CHX.Ref_Trip_0h.3h"
  index_activated <- "Abs.NonTDD..Lympho_Activated.Trip_CHX.Ref_Trip_0h.3h"
  
  rCSC_resting <- get_rCSC(df_codon_freq = df_codon_freq, index = index_resting)
  rCSC_activated <- get_rCSC(df_codon_freq = df_codon_freq, index = index_activated)
  
  pval_resting <- pval_perm(cds = fastaFile,
                            index_name = index_resting,
                            CSC_ref = rCSC_resting,
                            n_perm = 10000)
  
  # correct pval for multiple test
  pval_resting[pval_resting == "<1e-04"] <- 1/10000
  pval_resting <- as.data.frame(pval_resting, row.names = names(pval_resting))
  colnames(pval_resting) <- "pval"
  pval_resting$pval <- as.numeric(as.character(pval_resting$pval))
  pval_resting$FDR <- p.adjust(pval_resting$pval,  method = "BY")
  
  # codon no longer significatif after correction :
  # rownames(pval_resting)[pval_resting$pval<0.05 & pval_resting$FDR>0.05]
  # "CTT" "GGA" "TTT"
  
  save(pval_resting, file = paste0("results/pvalCSC_",index_resting,".RData"))
  
  pval_activated <- pval_perm(cds = fastaFile,
                              index_name = index_activated,
                              CSC_ref = rCSC_activated,
                              n_perm = 10000)
  
  # correct pval for multiple test
  pval_activated[pval_activated == "<1e-04"] <- 1/10000
  pval_activated <- as.data.frame(pval_activated, row.names = names(pval_activated))
  colnames(pval_activated) <- "pval"
  pval_activated$pval <- as.numeric(as.character(pval_activated$pval))
  pval_activated$FDR <- p.adjust(pval_activated$pval,  method = "BY")
  
  # codon no longer significatif after correction :
  # rownames(pval_activated)[pval_activated$pval<0.05 & pval_activated$FDR>0.05]
  #  "AAA" "CAG" "GAA" "TCC" "TTT"
  
  save(pval_activated, file = paste0("results/pvalCSC_",index_activated,".RData"))


# # degradation rate --------------------------------------------------------

index_resting <- "DegFold.Lympho_Resting.Trip.Ref_Trip_0h.3h"
index_activated <- "DegFold.Lympho_Activated.Trip.Ref_Trip_0h.3h"

rCSC_resting <- get_rCSC(df_codon_freq = df_codon_freq, index = index_resting)
rCSC_activated <- get_rCSC(df_codon_freq = df_codon_freq, index = index_activated)

pval_resting <- pval_perm(cds = fastaFile,
                          index_name = index_resting,
                          CSC_ref = rCSC_resting,
                          n_perm = 10000)

# correct pval for multiple test
pval_resting[pval_resting == "<1e-04"] <- 1/10000
pval_resting <- as.data.frame(pval_resting, row.names = names(pval_resting))
colnames(pval_resting) <- "pval"
pval_resting$pval <- as.numeric(as.character(pval_resting$pval))
pval_resting$FDR <- p.adjust(pval_resting$pval,  method = "BY")

# codon no longer significatif after correction :
# rownames(pval_resting)[pval_resting$pval<0.05 & pval_resting$FDR>0.05]
# "AAT" "AGA" "AGC" "ATT" "CAA" "GAG" "GCC" "GTA" "TAT" "TCC" "TTA" "TTT"

save(pval_resting, file = paste0("results/pvalCSC_",index_resting,".RData"))

pval_activated <- pval_perm(cds = fastaFile,
                            index_name = index_activated,
                            CSC_ref = rCSC_activated,
                            n_perm = 10000)

# correct pval for multiple test
pval_activated[pval_activated == "<1e-04"] <- 1/10000
pval_activated <- as.data.frame(pval_activated, row.names = names(pval_activated))
colnames(pval_activated) <- "pval"
pval_activated$pval <- as.numeric(as.character(pval_activated$pval))
pval_activated$FDR <- p.adjust(pval_activated$pval,  method = "BY")

# codon no longer significatif after correction :
# rownames(pval_activated)[pval_activated$pval<0.05 & pval_activated$FDR>0.05]
# "AAC" "AAG" "CTT" "GAC" "GGA" "GTC" "TCG"

save(pval_activated, file = paste0("results/pvalCSC_",index_activated,".RData"))

# Absolute TDD index with Amino Acid --------------------------------------

index_resting <- "Abs.TDD..Lympho_Resting.Trip_CHX.Ref_Trip_0h.3h"
index_activated <- "Abs.TDD..Lympho_Activated.Trip_CHX.Ref_Trip_0h.3h"

rCSC_resting <- get_rCSC(df_codon_freq = df_aa_freq, index_name = index_resting)
rCSC_activated <- get_rCSC(df_codon_freq = df_aa_freq, index_name = index_activated)

pval_resting <- pval_perm(cds = fastaFile,
                          index_name = index_resting,
                          CSC_ref = rCSC_resting,
                          n_perm = 10000,
                          is_codon = FALSE)

# correct pval for multiple test 
pval_resting[pval_resting == "<1e-04"] <- 1/10000
pval_resting <- as.data.frame(pval_resting, row.names = names(pval_resting))
colnames(pval_resting) <- "pval"
pval_resting$pval <- as.numeric(as.character(pval_resting$pval))
pval_resting$FDR <- p.adjust(pval_resting$pval,  method = "BY")

save(pval_resting, file = paste0("results/pvalCSC_amnioAcid_",index_resting,".RData"))

pval_activated <- pval_perm(cds = fastaFile,
                            index_name = index_activated,
                            CSC_ref = rCSC_activated,
                            n_perm = 10000,
                            is_codon = FALSE)

# correct pval for multiple test 
pval_activated[pval_activated == "<1e-04"] <- 1/10000
pval_activated <- as.data.frame(pval_activated, row.names = names(pval_activated))
colnames(pval_activated) <- "pval"
pval_activated$pval <- as.numeric(as.character(pval_activated$pval))
pval_activated$FDR <- p.adjust(pval_activated$pval,  method = "BY")

save(pval_activated, file = paste0("results/pvalCSC_",index_activated,".RData"))

# Absolute Non TDD index with Amino Acid --------------------------------------

index_resting <- "Abs.NonTDD..Lympho_Resting.Trip_CHX.Ref_Trip_0h.3h"
index_activated <- "Abs.NonTDD..Lympho_Activated.Trip_CHX.Ref_Trip_0h.3h"

rCSC_resting <- get_rCSC(df_codon_freq = df_aa_freq, index_name = index_resting)
rCSC_activated <- get_rCSC(df_codon_freq = df_aa_freq, index_name = index_activated)

pval_resting <- pval_perm(cds = fastaFile,
                          index_name = index_resting,
                          CSC_ref = rCSC_resting,
                          n_perm = 10000,
                          is_codon = FALSE)

# correct pval for multiple test 
pval_resting[pval_resting == "<1e-04"] <- 1/10000
pval_resting <- as.data.frame(pval_resting, row.names = names(pval_resting))
colnames(pval_resting) <- "pval"
pval_resting$pval <- as.numeric(as.character(pval_resting$pval))
pval_resting$FDR <- p.adjust(pval_resting$pval,  method = "BY")

save(pval_resting, file = paste0("results/pvalCSC_amnioAcid_",index_resting,".RData"))

pval_activated <- pval_perm(cds = fastaFile,
                            index_name = index_activated,
                            CSC_ref = rCSC_activated,
                            n_perm = 10000,
                            is_codon = FALSE)

# correct pval for multiple test 
pval_activated[pval_activated == "<1e-04"] <- 1/10000
pval_activated <- as.data.frame(pval_activated, row.names = names(pval_activated))
colnames(pval_activated) <- "pval"
pval_activated$pval <- as.numeric(as.character(pval_activated$pval))
pval_activated$FDR <- p.adjust(pval_activated$pval,  method = "BY")

save(pval_activated, file = paste0("results/pvalCSC_",index_activated,".RData"))

# Degradation rate index with Amino Acid --------------------------------------

index_resting <- "DegFold.Lympho_Resting.Trip.Ref_Trip_0h.3h"
index_activated <- "DegFold.Lympho_Activated.Trip.Ref_Trip_0h.3h"

rCSC_resting <- get_rCSC(df_codon_freq = df_aa_freq, index_name = index_resting)
rCSC_activated <- get_rCSC(df_codon_freq = df_aa_freq, index_name = index_activated)

pval_resting <- pval_perm(cds = fastaFile,
                          index_name = index_resting,
                          CSC_ref = rCSC_resting,
                          n_perm = 10000,
                          is_codon = FALSE)

# correct pval for multiple test 
pval_resting[pval_resting == "<1e-04"] <- 1/10000
pval_resting <- as.data.frame(pval_resting, row.names = names(pval_resting))
colnames(pval_resting) <- "pval"
pval_resting$pval <- as.numeric(as.character(pval_resting$pval))
pval_resting$FDR <- p.adjust(pval_resting$pval,  method = "BY")

save(pval_resting, file = paste0("results/pvalCSC_amnioAcid_",index_resting,".RData"))

pval_activated <- pval_perm(cds = fastaFile,
                            index_name = index_activated,
                            CSC_ref = rCSC_activated,
                            n_perm = 10000,
                            is_codon = FALSE)

# correct pval for multiple test 
pval_activated[pval_activated == "<1e-04"] <- 1/10000
pval_activated <- as.data.frame(pval_activated, row.names = names(pval_activated))
colnames(pval_activated) <- "pval"
pval_activated$pval <- as.numeric(as.character(pval_activated$pval))
pval_activated$FDR <- p.adjust(pval_activated$pval,  method = "BY")

save(pval_activated, file = paste0("results/pvalCSC_",index_activated,".RData"))
