
#Figure 3 expression and TE 

library(gridExtra)
library(grid)

mintot=10 #minimum total RNA read number for TE comparisons (to avoid overestimation due to very low read counts) 

ccol=c("grey22","springgreen3", "purple","lightslateblue","midnightblue")
colreg=c(1, "purple3")

source("myBiplot.R") 
library(RColorBrewer)

#Functions #############################################################
#make transparent colors
makeTransparent<-function(someColor, alpha=100)
{
  newColor<-col2rgb(someColor)
  apply(newColor, 2, function(curcoldata){rgb(red=curcoldata[1], green=curcoldata[2],
    blue=curcoldata[3],alpha=alpha, maxColorValue=255)})
}

#plot paiwise comparisons
pairwise_test=function(select_test,c1,c2,n1,n2){
	plot(1:2,1:2, type="n", axes=F, xlab="", ylab="",
		xlim=c(0,max(c2)+1), ylim=c(0,length(c2)+2))
	
	#countsig=0
	for (i in 1:length(c1)){
		
		f1=n1[i]
		f2=n2[i]
		
		g1=c1[i]
		g2=c2[i]

		testg=wilcox.test(select_test$trait[select_test$factor==f1],select_test$trait[select_test$factor==f2])
		pvalg=testg$p.value
		print (pvalg)
		if (pvalg <= 0.05){
			#countsig=countsig+1
			lines(c(g1+0.5,g2+0.5), c(i,i))
			
			#add text * 
			if (pvalg <= 0.001){
				labg="***" 
			}
			if (pvalg <=0.01 & pvalg > 0.001){
				labg="**" 
			}
				if (pvalg <=0.05 & pvalg > 0.01){
				labg="*" 
			}
			text((g1+g2+1)/2,i+0.4,label=labg,cex=1.5)
		}
	}
}

make_tableimage=function(selrange){
	#calculate mean per factor and soze range
	 
	selrange$idrange=paste(selrange$factor, "-",selrange$rangei, sep="")
	meantrait=tapply(selrange$trait, selrange$idrange, mean)

	ttrait=data.frame(idrange=names(meantrait), meantrait=meantrait)
	ttrait$rangei=as.numeric(matrix(unlist(strsplit(as.character(ttrait$idrange), "-")), ncol=2, byrow=T)[,2])
	ttrait$factor=as.numeric(matrix(unlist(strsplit(as.character(ttrait$idrange), "-")), ncol=2, byrow=T)[,1])
	
	#prepare a matrix for image per age
	nbranges=max(ttrait$rangei)
	nbfact=max(ttrait$factor)+1
	
	tplot_trait=matrix(rep(NA, nbranges*nbfact), ncol=nbranges)
	for (coli in 1:nbranges){
		for(rawi in 1:nbfact){
			select=ttrait$meantrait[ttrait$rangei==coli & ttrait$factor+1==rawi]
			if(length(select !=0)){
				tplot_trait[rawi,coli]=ttrait$meantrait[ttrait$rangei==coli & ttrait$factor+1==rawi]
			}
		}
	}
	return(tplot_trait)
} 

#Analysis #############################################################

#read data bilan 
data=read.table("../../00_tables/table_S2_metaexp_withRT.txt", header=T)
data_consorf=read.table("../../00_tables/02conservation/conservation_table_spar.txt",header=T)
orf_names=data_consorf$orf

data_orfsel=data[data$feat_name %in% orf_names,]
data_genesel=data[data$type=="gene",]
data=rbind(data_orfsel, data_genesel)

cola=c("seagreen3","slateblue3","tomato")



#remove not conserved ORF after filtering



#add a cons group with numbers to help for vizualisation 
data$cons2=as.character(data$cons)
data$cons2[data$cons=="Gene"]="0_gene"
data$cons2[data$cons=="Cons"]="1_Cons"

data$cons2[data$cons=="Spar"]="2_Spar"
data$cons2[data$cons=="Div"]="3_Div"
data$cons2[data$cons=="DivG"]="4_DivG"
data$cons2[data$cons=="Pol"]="5_Pol"




#######################################################################
# 
#FIGURE 3 ############################################################

#######################################################################
# RPF, TE, mRNA (normalized by size, in log2)


tiff(paste("Figures_manuscript/3.Figure3_exp_mintot",mintot,".tiff",sep=""), width = 9000, height = 9000,res=1350)
lettersize=1.2
#par(mfrow=c(6,4))

laymat=matrix(c(1,2,2,3,4,5,5,6,7,8,8,9,12,13,14,15,11,12,10,10), byrow=T, ncol=4)
layout(laymat,heights=c(2,5,2,0,8), widths=c(6,3,3,6))



#take haplotyopes with significant translation signatures
data_sig=data[data$sig=="1",]
#remove data with tot read > min tot to don't affect TE 
data_sig=data_sig[data_sig$TOT_start >=mintot,]

#===================================================================
# Age effect 
#==================================================================
	#Size per Age 
	eff1=table(data_sig$age)
	class1=c("Gen","N2", "N1","Term")
	cnames1=paste(class1,"\n n=",eff1, sep="")


	# t tests====================================================
	#between pairs
	c1=c(0,0,0,1,1,2)
	c2=c(1,2,3,2,3,3)
	par(mar=c(0,5,0,2))
	
	#RPF
	select_test=data.frame(trait=log2(data_sig$RPF_start), factor=data_sig$age)
	pairwise_test(select_test,c1,c2,c1,c2) #plot comparisons tests
	
	#par(xpd=T)
	#text(-1,5,"A",cex=2)
	title("A",cex.main=lettersize,adj=0, line=-3,outer=F)
	
	#TOT
	select_test=data.frame(trait=log2(data_sig$TOT_start), factor=data_sig$age)
	pairwise_test(select_test,c1,c2,c1,c2) #plot comparisons tests
	title("B",cex.main=lettersize,adj=0, line=-3,outer=F)
	
	#TE
	select_test=data.frame(trait=data_sig$TE_start, factor=data_sig$age)
	pairwise_test(select_test,c1,c2,c1,c2) #plot comparisons tests
	title("C",cex.main=lettersize,adj=0, line=-3,outer=F)
	
	


	# boxplot
	#==================================================================
par(mar=c(4,5,0,1))
	
	#RPF per Age 
	plot(as.factor(as.character(data_sig$age)), log2(data_sig$RPF_start), 
	names=class1, ylab=expression(paste("RPF start (" ,log[2],")",sep="")))
	
	#TOT per Age 
	plot(as.factor(as.character(data_sig$age)), log2(data_sig$TOT_start), names=class1,
	 ylab=expression(paste("Total RNA start (" ,log[2],")",sep="")))
	
	
	#TE per Age 
	plot(as.factor(as.character(data_sig$age)), data_sig$TE_start, 
	names=class1, ylab=expression(paste("TE start (" ,log[2],")",sep="")))
	
	
	#abline(h=0, col=2)
	
	
	
	
	##################################################################
	#heatmap with separated range sizes 
	##################################################################
	
	#we  use log2 values for break ranges 
	
	data_sig$log2=log2(data_sig$size_aa)
	
	#split by range size 
	split_table=split(data_sig,cut(data_sig$log2,c(seq(4.3,13,by=0.6))))
	range_names=attributes(split_table)$names
	
	#merge split tables in data_range with range classes
	data_range=NULL
	for (rangei in 1:length(range_names)){
		subi=data.frame(split_table[range_names[rangei]])
		subi$rangei=rep(rangei,dim(subi)[1])
		subi$rangeval=rep(range_names[rangei],dim(subi)[1])
		colnames(subi)=c(colnames(data_sig),"rangei", "rangeval")
		data_range=rbind(data_range, subi)
	}

		
#plot images ########################################################
	par(mar=c(2,3,0,1))
	#Per age============================================================= 
	rval1=matrix(unlist(strsplit(sub("(","",range_names,fixed=T),",")),ncol=2,byrow=T)[,1]
	rval1=round(2^as.numeric(rval1))
	#rval1=rval1[seq(1,length(rval1),by=2)]

	#prepare a table with the concerned factor and trait 
	# with RPF

	selrange=data.frame(factor=data_range$age, trait=log2(data_range$RPF_start), 
		rangei=data_range$rangei)
	
	tplotrpf=make_tableimage(selrange)
	image(t(tplotrpf),col=brewer.pal(n = 9, name ="BuGn"),axes=F)
	mtext(class1, side = 2, outer = F, at = (0:3)/3, cex=0.6,las=2)
	mtext(rval1, side = 1, outer = F, at = (0:13)/13, cex=0.6,las=2)

	#idem with TOT 

	selrange=data.frame(factor=data_range$age, trait=log2(data_range$TOT_start), 
		rangei=data_range$rangei)
	
	tplottot=make_tableimage(selrange)
	image(t(tplottot),col=brewer.pal(n = 9, name ="BuGn"),axes=F)
	mtext(class1, side = 2, outer = F, at = (0:3)/3, cex=0.6,las=2)
	mtext(rval1, side = 1, outer = F, at = (0:13)/13, cex=0.6,las=2)
	
	#idem with TE 

	selrange=data.frame(factor=data_range$age, trait=data_range$TE_start, 
		rangei=data_range$rangei)
	
	tplotte=make_tableimage(selrange)

	image(t(tplotte),col=brewer.pal(n = 9, name ="BuGn"),axes=F)
	mtext(class1, side = 2, outer = F, at = (0:3)/3, cex=0.6,las=2)
	mtext(rval1, side = 1, outer = F, at = (0:13)/13, cex=0.6,las=2)

	


	
	
	#========================================================================
	#look rpf vs mRMA to check expression effect #####################################
	#========================================================================

	#png(paste("../figures/06multi/Figure4_RPFstartvsTOTstart_min",mintot,".png",sep=""), width = 700, height = 700,res=150)
	par(mar=c(5,5,5,5))
	plot(log2(data_sig$TOT_start), log2(data_sig$RPF_start), pch=20,type="n",
	ylab=expression(paste("RPF start (" ,log[2],")",sep="")), 
	xlab=expression(paste("Total RNA start (" ,log[2],")",sep="")))
	
	title("D",cex.main=lettersize,adj=0, line=1,outer=F)
	
	#plot genes 
	agei=0
	seli=data_sig[data_sig$age==agei,]
	points(log2(seli$TOT_start), log2(seli$RPF_start), pch=20,cex=0.8,col=ccol[1])
	testi=cor.test(log2(seli$RPF_start),log2(seli$TOT_start))$p.value
		abline(lm(log2(seli$RPF_start)~ log2(seli$TOT_start)),col=colreg[1],lwd=2)

	#add ORFs 
		seli=data_sig[data_sig$type=="orf",]
		points(log2(seli$TOT_start), log2(seli$RPF_start), pch=20,cex=0.8,col=ccol[3])
		testi=cor.test(log2(seli$RPF_start),log2(seli$TOT_start))$p.value
		abline(lm(log2(seli$RPF_start)~ log2(seli$TOT_start)),col=colreg[2],lwd=2)
		
		
		#test pente ORFs vs genes 
     	
		rpf=log2(data_sig$RPF_start)
		tot=log2(data_sig$TOT_start)
		type=as.factor(data_sig$type)
	
		model=lm(rpf~tot+type+tot:type)
		anova(model)

	

        
    		


#check effectifs
	teff=table(data_range$age, data_range$rangei)
	#teff[teff==0]=" "
	teff2=rbind(teff[4,],teff[3,],teff[2,],teff[1,])
	rval1=matrix(unlist(strsplit(sub("(","",range_names,fixed=T),",")),ncol=2,byrow=T)[,1]
	rval1=round(2^as.numeric(rval1))
	
	colnames(teff2)=rval1
	
	#make a table 
 	#grid.table(teff2, theme=tt3)
colg=grey(0.75)
par(mar=c(13,2,2,0))
plot(1:15,1:15,type="n", axes=F, ann=F, ylim=c(2,7))
for (i in 1:15){
    lines(c(i,i), c(2,6), col=colg)
}

for(i in 2:6){
    lines(c(1,15), c(i,i), col=colg)
}
for (i in 1:4){
    linei=teff2[(4-i)+1,]
    text(c(1:14)+0.5, rep(i+1.5, 14), labels=linei, srt=90, cex=0.7)
}

polygon(c(1,1,3,3),c(2,3,3,2), col=colg, border=colg)
polygon(c(4,4,15,15),c(3,4,4,3), col=colg, border=colg)
polygon(c(5,5,15,15),c(4,5,5,4), col=colg, border=colg)
polygon(c(6,6,15,15),c(5, 6,6,5), col=colg, border=colg)

mtext(colnames(teff2), at=c(1:14)+0.5,las=2,side=1, cex=0.6)

mtext(class1, at=c(2:5)+0.5,las=1,side=2, cex=0.6)
#title("D",cex.main=lettersize,adj=0, line=1,outer=F)
	
dev.off()


#FIGURE S6 ############################################################
# check RPF/ TOT slope differences between ages 
#=======================================================================

tiff(paste("Supplementary_Fig/Fig_S6_RPFvsTOTperAge_",mintot,".tiff",sep=""), width = 6000, height = 3000,res=550)


	par(mar=c(5,5,5,5), mfrow=c(1,2))
    
    #TE vs size 

	plot(log2(data_sig$size_aa), data_sig$TE_start, pch=20,type="n",
	ylab=expression(paste("TE start (" ,log[2],")",sep="")), 
	xlab=expression(paste("Size (amino acids, " ,log[2],")",sep="")))
	title("A",cex.main=lettersize,adj=0, line=1,outer=F)
	
	#plot genes 
	agei=0
	seli=data_sig[data_sig$age==agei,]
	points(log2(seli$size_aa), seli$TE_start, pch=20,cex=0.8,col=ccol[1])
	testi=cor.test(seli$TE_start,log2(seli$size_aa))$p.value
	if(testi <= 0.05){
		abline(lm(seli$TE_start~ log2(seli$size_aa)),col=colreg[1],lwd=2)
	}
	if(testi > 0.05){
		#abline(lm(seli$TE_start~ log2(seli$size_aa)),col=colreg[1],lwd=2,lty=2)
	}

	#add ORFs 
	seli=data_sig[data_sig$type=="orf",]
	points(log2(seli$size_aa), seli$TE_start, pch=20,cex=0.8,col=ccol[3])
	testi=cor.test(seli$TE_start,log2(seli$size_aa))$p.value
	if(testi <= 0.05){
		abline(lm(seli$TE_start~ log2(seli$size_aa)),col=colreg[2],lwd=2)
	}
	if(testi > 0.05){
		#abline(lm(seli$TE_start~ log2(seli$size_aa)),col=colreg[2],lwd=2,lty=2)
	}

    #plot RPF vs TOT per age
	plot(log2(data_sig$TOT_start), log2(data_sig$RPF_start), pch=20,type="n",
	ylab=expression(paste("RPF start (" ,log[2],")",sep="")), 
	xlab=expression(paste("Total RNA start (" ,log[2],")",sep="")))
	title("B",cex.main=lettersize,adj=0, line=1,outer=F)
	
	#title("D",cex.main=2,adj=0, line=1,outer=F)
	
	#plot genes 
	agei=0
	seli=data_sig[data_sig$age==agei,]
	points(log2(seli$TOT_start), log2(seli$RPF_start), pch=20,cex=0.8,col=ccol[1])
	testi=cor.test(log2(seli$RPF_start),log2(seli$TOT_start))$p.value
		abline(lm(log2(seli$RPF_start)~ log2(seli$TOT_start)),col=colreg[1],lwd=2)

	#add ORFs 
		seli=data_sig[data_sig$age==1,]
		points(log2(seli$TOT_start), log2(seli$RPF_start), pch=20,cex=0.8,col=cola[1])
		testi=cor.test(log2(seli$RPF_start),log2(seli$TOT_start))$p.value
		if(testi<0.05){
            abline(lm(log2(seli$RPF_start)~ log2(seli$TOT_start)),col=cola[1],lwd=2)
        }
        
        		seli=data_sig[data_sig$age==2,]
		points(log2(seli$TOT_start), log2(seli$RPF_start), pch=20,cex=0.8,col=cola[2])
		testi=cor.test(log2(seli$RPF_start),log2(seli$TOT_start))$p.value
		if(testi<0.05){
            abline(lm(log2(seli$RPF_start)~ log2(seli$TOT_start)),col=cola[2],lwd=2)
        }
        
        		seli=data_sig[data_sig$age==3,]
		points(log2(seli$TOT_start), log2(seli$RPF_start), pch=20,cex=0.8,col=cola[3])
		testi=cor.test(log2(seli$RPF_start),log2(seli$TOT_start))$p.value
		if(testi<0.05){
            abline(lm(log2(seli$RPF_start)~ log2(seli$TOT_start)),col=cola[3],lwd=2)
        }
        

    legend("topleft" ,c("Genes","tORF_N2","tORF_N1","tORF_Term"), cex=0.9, fill=c("grey30",cola), border="white")
dev.off()

