library(ggplot2)
theme_set(theme_bw())


# Load and prepare table --------------------------------------------------

table_codon <- read.delim("data/1.Codons_by_gene_in_genome.df")
table_codon[,-1] <- apply(table_codon[,-1], 2, function(x) as.numeric(as.character(x)))
table_codon[is.na(table_codon)] <- 0
table_codon$lengthCDS <- rowSums(table_codon[,-1])
colnames(table_codon)[1] <- "EnsemblID"
table_codon$EnsemblID = gsub("(^[^.]*)(.*$)", "\\1", table_codon$EnsemblID)

load("data/04bis_database_v3.RData")
Index_TDD_table = subset(
  table,
  select = c(
    "EnsemblID",
    "IndexTDD_LymphoR",
    "IndexTDD_LymphoA",
    "IndexTDD_MacroR"
  )
)

### merger la table des index avec l'index des MacroA calculer  a part ------------------
data_TDDA_mean <- read.csv(file = "data/data_TDDA_mean.csv", sep = "t",dec = ",", na = " ")

Index_TDD_table = merge(Index_TDD_table,
                        data_TDDA_mean,
                        by = "EnsemblID",
                        all = TRUE)

## merger la table des fréquences de l'occurance des codons -------------------
occurance_codon_table_control = merge(table_codon,
                                      Index_TDD_table,
                                      by = "EnsemblID",
                                      all = TRUE)

occurance_codon_table_control = na.exclude(occurance_codon_table_control)
library(reshape2)
db <- melt(data = occurance_codon_table_control, id.vars = c("EnsemblID", "lengthCDS", "IndexTDD_LymphoR", "IndexTDD_LymphoA", "IndexTDD_MacroR",  "IndexTDD_MacroA"))
colnames(db)[7] = "codon"
colnames(db)[8] = "counts"
 
library(MASS)
model <- glm.nb(counts ~ -1 + IndexTDD_LymphoR * codon + lengthCDS, data = db)
