#Script to analyze translation efficiency 

library("DESeq2")
library(RColorBrewer)


plim=0.05


ccol=c("grey22","springgreen3", "purple","lightslateblue","midnightblue")

#######################################################################
#  BEGIN FUNCTIONS 
#######################################################################


#Differential analysis RATIO 
norm_te <- function( data, coldata_all, plim, norm, extract){
	#"""Function to extract TE with normalized counts with deseq2"""

	coldata=coldata_all
	countdata=data
	
	#Construction d'une DESeqDataSet
	dds=DESeqDataSetFromMatrix(countData=countdata, colData=coldata, 
	design=~type)
	
	#dds <- estimateSizeFactors(dds,type=c("iterate"))
	dds <- estimateSizeFactors(dds)
	
	dds <- estimateDispersions(dds,fitType ="local")
	count_norm=counts(dds, normalized=TRUE)
	
	#choose norm methods
	if (norm == "rlog"){
		data_rlg=assay(rlog(dds))
	}
	
	if (norm == "log2"){
		data_rlg=assay(normTransform(dds))
	}

	#mean of replicates rpf 
	rpfnorm=cbind((data_rlg[,1]+data_rlg[,2])/2)	
		
	#mean of replicates tot
	totnorm=cbind((data_rlg[,3]+data_rlg[,4])/2)
		te_norm=rpfnorm-totnorm
		colnames(te_norm)=c("Cer")
		if(extract=="TE"){
			return(te_norm)
		}
		
		if(extract=="counts"){
			return(count_norm)
		}
}



#######################################################################
#  END FUNCTIONS 
#######################################################################


#######################################################################
#  BEGIN ANALYSIS
#######################################################################



#Read datafile
count_table=read.table("../../13_TE_Cer/04_coverage_start/countall_start60.txt", header=T)
count_table=count_table[!is.na(count_table$sig),]
select=count_table[count_table$sig==1,]

data=select[,2:5]

rownames(data)=select$name


#tableau design 
col1=rep("Cer",4)
col2=c("RPF", "RPF","TOT","TOT")

coldata_all=data.frame(lineage=col1, type=col2)
rownames(coldata_all)=colnames(data)


#Extract TE
TE_log2=norm_te( data, coldata_all, plim, "log2","TE" )
TE_rlog=norm_te( data, coldata_all, plim, "rlog","TE" )

count_norm=norm_te( data, coldata_all, plim, "rlog","counts" )


write.table(TE_log2, 
"../../13_TE_Cer/04_coverage_start/Ref_TE_log2_start60.txt", col.names=T, 
sep="\t", quote=F, row.names=T)


write.table(TE_rlog, 
"../../13_TE_Cer/04_coverage_start/Ref_TE_rlog_start60.txt", col.names=T, 
sep="\t", quote=F, row.names=T)


write.table(count_norm, "../../13_TE_Cer/04_coverage_start/Ref_RPFTOT_normforTE_start60.txt", col.names=T, 
sep="\t", quote=F, row.names=T)



#check differences
datamean=data.frame(RPF=(count_norm[,1]+count_norm[,2])/2,
TOT=(count_norm[,3]+count_norm[,4])/2)

colreg=c(1, "purple3")
tiff(paste("../figures/06multi/Figure4_S288C_RPFstartvsTOTstart.tiff",sep=""), width = 1400, height = 1400,res=300)

plot(log2(datamean$TOT), log2(datamean$RPF),pch=20,col=ccol[1],ylab=expression(paste("RPF start (" ,log[2],")",sep="")),
 xlab=expression(paste("Total RNA start (" ,log[2],")",sep="")),cex=0.8)
points(log2(datamean$TOT[select$type=="orf"]), log2(datamean$RPF[select$type=="orf"]),pch=20, col=ccol[3],cex=0.8)
abline(lm(log2(datamean$RPF[select$type=="gene"])~ log2(datamean$TOT[select$type=="gene"])),col=colreg[1])
abline(lm(log2(datamean$RPF[select$type=="orf"])~ log2(datamean$TOT[select$type=="orf"])),col=colreg[2])
dev.off()



#slope differences (interaction effect)
rpf=log2(datamean$RPF)
tot=log2(datamean$TOT)
type=as.factor(select$type)


model=lm(rpf~tot+type+tot:type)
anova(model)


#correlation 
cor.test(log2(datamean$RPF[select$type=="orf"]),log2(datamean$TOT[select$type=="orf"]))
cor.test(log2(datamean$RPF[select$type=="gene"]),log2(datamean$TOT[select$type=="gene"]))




