#Script to analyze translation efficiency 

library("DESeq2")
library(RColorBrewer)


plim=0.05


#######################################################################
#  BEGIN FUNCTIONS 
#######################################################################


#Differential analysis RATIO 
norm_te <- function( data, coldata_all, plim, norm, extract){
	#"""Function to extract TE with normalized counts with deseq2"""

	coldata=coldata_all
	countdata=data
	
	#Construction d'une DESeqDataSet
	dds=DESeqDataSetFromMatrix(countData=countdata, colData=coldata, 
	design=~type+lineage+type:lineage)
	
	#dds <- estimateSizeFactors(dds,type=c("iterate"))
	dds <- estimateSizeFactors(dds)
	
	dds <- estimateDispersions(dds,fitType ="local")
	count_norm=counts(dds, normalized=TRUE)
	
	#choose norm methods
	if (norm == "rlog"){
		data_rlg=assay(rlog(dds))
	}
	
	if (norm == "log2"){
		data_rlg=assay(normTransform(dds))
	}

	#mean of replicates rpf 
	rpfnorm=cbind((data_rlg[,1]+data_rlg[,2])/2, 
		(data_rlg[,5]+data_rlg[,6])/2,
		(data_rlg[,9]+data_rlg[,10])/2, 
		(data_rlg[,13]+data_rlg[,14])/2) 
	
		
	#mean of replicates tot
	totnorm=cbind((data_rlg[,3]+data_rlg[,4])/2, 
		(data_rlg[,7]+data_rlg[,8])/2,
		(data_rlg[,11]+data_rlg[,12])/2, 
		(data_rlg[,15]+data_rlg[,16])/2) 
		
		te_norm=rpfnorm-totnorm
		colnames(te_norm)=c("Cer","SpA","SpB","SpC")
		if(extract=="TE"){
			return(te_norm)
		}
		
		if(extract=="counts"){
			return(count_norm)
		}
}



#######################################################################
#  END FUNCTIONS 
#######################################################################


#######################################################################
#  BEGIN ANALYSIS
#######################################################################



#Read datafile

count_file_genes="../tables_out/04counts/counts_genes_start60.txt"
count_file_orf="../tables_out/04counts/counts_orfs_sigphas_start60.txt"


data_genes=read.table(count_file_genes, header=T)
data_orf=read.table(count_file_orf, header=T)

#tableau design 
col1=rep(c("Cer","AA","BB","CC"),each=4)
col2=rep(c("RPF", "RPF","TOT","TOT"),4)
coldata_all=data.frame(lineage=col1, type=col2)
rownames(coldata_all)=colnames(data_orf)

data=rbind(data_orf,data_genes)

#remove NA some are missing because limit size without gap
row.has.na <- apply(data, 1, function(x){any(is.na(x))})
data=data[!row.has.na,]


#Extract TE
TE_log2=norm_te( data, coldata_all, plim, "log2","TE" )
TE_rlog=norm_te( data, coldata_all, plim, "rlog","TE" )

count_norm=norm_te( data, coldata_all, plim, "rlog","counts" )


write.table(TE_log2, 
"../tables_out/04counts/Deseq2_start60/TE_log2_orf_genes_start60.txt", col.names=T, 
sep="\t", quote=F, row.names=T)


write.table(TE_rlog, 
"../tables_out/04counts/Deseq2_start60/TE_rlog_orf_genes_start60.txt", col.names=T, 
sep="\t", quote=F, row.names=T)


write.table(count_norm, 
"../tables_out/04counts/Deseq2_start60/RPF_TOT_normforTE_start60.txt", col.names=T, 
sep="\t", quote=F, row.names=T)




