
#Script to compare translation detection methods 
#peak = 0 :no peak, peak =1,2 and 3 = one peak  
# functions ############################################################

library(RColorBrewer)
library("eulerr")
library("gplots")
makeTransparent<-function(someColor, alpha=100)
{
  newColor<-col2rgb(someColor)
  apply(newColor, 2, function(curcoldata){rgb(red=curcoldata[1], green=curcoldata[2],
    blue=curcoldata[3],alpha=alpha, maxColorValue=255)})
}




#ccol=c(rgb(0,128,255,max=255),"springgreen3", "purple","lightslateblue","midnightblue")
ccol=c("grey22","springgreen3", "purple","lightslateblue","midnightblue")

########################################################################
# PARAMETER USED FOR DATA_BILAN ANALYSIS ###############################

#minread_peak=5
#minread_peakb=5

#minread_phas=15
#minread_phasb=15
maxpos=101
pval_max=0.05
#peakmin_order=1

#read data_stat
data_bilan=read.table("../../tables_outrevised/03plastid/table_all_stat.txt", header=T)

#V2 change  peak presence/absence :0 or 1
data_bilan$peak2=data_bilan$peak
data_bilan$peak2[data_bilan$peak >=1]=1

#take counts nb of orf or genes per peak level

data_genes=data_bilan[data_bilan$type=="gene",]
data_orf=data_bilan[data_bilan$type=="orf",]

#read ribotaper results to compare
data_rt=read.table("../../../../../05RiboTaper/00scripts/table_denovoorf_rtsig.txt", header=T)

#genes =================================================================




#add ages for figure ######################################################################
data_rec=read.table("../../tables_outrevised/02conservation/conservation_table_spar.txt",colClasses=c(
"character","character","character","numeric", "character",rep("numeric",8)))
#select table with ORF present in at least one sequenced strain

select_cer=matrix(unlist(strsplit(data_rec$cons,"")),byrow=T,ncol=26)[,2]
select_A=matrix(unlist(strsplit(data_rec$cons,"")),byrow=T,ncol=26)[,7]
select_B=matrix(unlist(strsplit(data_rec$cons,"")),byrow=T,ncol=26)[,9]
select_C=matrix(unlist(strsplit(data_rec$cons,"")),byrow=T,ncol=26)[,22]
select_all=select_cer=="1" | select_A=="1" | select_B=="1"  | 
select_C=="1" 

#create a matrix of presence absence to simplify
data_rec$Cer=select_cer
data_rec$A=select_A
data_rec$B=select_B
data_rec$C=select_C

data_red=data_rec[data_rec$Cer=="1" | data_rec$A=="1" | data_rec$B=="1" | data_rec$C=="1" |
data_rec$group=="7_ancest",]

#add ORF ages

#Age 1 (A-BC div) : N2 = 1 and N1= 1 or 0
#Age 2 (B-C div) : N2=0 and N1=1
#Age 3 after div: N1=0 and N2=0

#OK classification according their age 

data_red$age=rep(0,dim(data_red)[1])
data_red$age[data_red$N1 ==0 & data_red$N2==0]=3
data_red$age[data_red$N1 ==1 & data_red$N2==0]=2
data_red$age[data_red$N2 ==1]=1

data_red=data.frame(name=data_red$orf, age=data_red$age)
data_bilan=merge(data_bilan,data_red, by.x="name", by.y="name", all.x=T)
data_bilan$age[data_bilan$type=="gene"]=0



#add ribotaper results 

data_bilan$id=paste(data_bilan$haplo, ";",data_bilan$name, sep="")
data_rt$id=paste(data_rt$haplo, ";",data_rt$orf, sep="")
data_rt$sig_rt=rep(1,dim(data_rt)[1])
data_bilan=merge(data_bilan, data_rt, by.x="id", by.y="id", all.x=T)
colnames(data_bilan)[3]="haplo"

######################################################################
#FIGURES 
######################################################################
#palette choice
colp=brewer.pal(4, "YlGnBu")



#chaplo=unique(data_bilan$haplo)
chaplo=c("SD01", "SD06","SA03")
cname_haplo=c("SpA","SpB","SpC")
clim=c(35,10,20)

lettersize=2
	
tiff(filename ="Supplementary_Fig/S3.Meta_compareRT_haplos.tiff", width = 6000, height = 7500, 
units = "px", res=650)

#matlab=c(1,1,8,8,15,15,22,22,2,3,9,10,16,17,23,24,4,5,11,12,18,19,25,26,6,7,13,14,20,21,27,28)
#	zones <- matrix(matlab, ncol = 4, byrow = F)
#	layout(zones, widths=c(3,4,4,4), heights = c(2,4,2,4,2,4,2,4))
matlab=c(1,1,8,8,15,15,2,3,9,10,16,17,4,5,11,12,18,19,6,7,13,14,20,21)
	zones <- matrix(matlab, ncol = 4, byrow = F)
	layout(zones, widths=c(3,4,4,4), heights = c(2.5,4,2,4,2,4))


for (i in 1:length(chaplo)){
	haploi=chaplo[i]
	
	
	#tiff(filename =paste("Figures_manuscript/S6.Meta_compareRT_",haploi,".tiff",sep=""), width = 2000, height = 3000, 
	#units = "px", res=215)
	
	#svg(filename =paste("Figures_manuscript/3.Fig2_",haploi,".svg",sep=""), width = 14, height = 15, pointsize=16)
	#layout.show(n=6)
	
	
	data_haplo=data_bilan[data_bilan$haplo==haploi,]
	
	data_genes=data_haplo[data_haplo$type=="gene",]
	data_orf=data_haplo[data_haplo$type=="orf",]

	
	
	

#Heatmap with separate expression levels 
########################################################################

#colh=brewer.pal(9, "Blues")
colh=brewer.pal(9, "Greys")
	
	data_haplo=data_bilan[data_bilan$haplo==haploi,]
	
	#Venn to compare detections 
	
	orf_sig=data_haplo$name[data_haplo$sig=="1" & data_haplo$type=="orf"]
	orf_rt=data_haplo$name[data_haplo$sig_rt=="1" & data_haplo$type=="orf" & !is.na(data_haplo$sig_rt)]
	par(mar=c(6,0,7,0))
	venn(list(tORFs_sig=orf_sig, tORFs_RT=orf_rt))
	title(main=cname_haplo[i])
	if (i==1){
	title("A",cex.main=2,adj=0.1, line=5,outer=F)
    }
	#tORF with our methods only ==========================================
	#====================================================================
	
	select_sig=data_haplo$sig =="1" & is.na(data_haplo$sig_rt)
	#select_sig=data_haplo$sig_rt =="1"
	#select_sigrt=data_haplo$sig_rt =="1" & !is.na(data_haplo$sig_rt)


	count=data.frame(matrix(as.numeric(unlist(strsplit(as.character(data_haplo$counts), 
	"-"))), ncol=101, byrow=TRUE))
	
	row.names(count)=data_haplo$name
	
	#plot profiles=====================================================
	par(mar=c(0,6,6,2))
	#iORFs
	
	sel_orf=count[select_sig & data_haplo$type=="orf" ,]
	sumo=apply(sel_orf,2,sum)[49:101]
	plot(-2:50,sumo/dim(sel_orf)[1], type="l", col=ccol[3], 
		ylab="Mean read counts)", xaxt='n',ann=FALSE, lwd=2, 
		main=paste(haplo," iORFs sig n =", dim(sel_orf)[1]),
		xlim=c(-2,48.5),ylim=c(0,clim[i]))
	
	title(main="tORFs sig only")
	if (i==1){
	title("B",cex.main=2,adj=0, line=4,outer=F)
}
	#heatmap##############################################################
		par(mar=c(7,6,0,2))
	
	count_orf=count[select_sig & data_haplo$type=="orf",]

	####test
	#count_orf=count_orf[1:200,]
	ctot=apply(count_orf,1,sum)
	cnorm=NULL
	for (j in 1:dim(count_orf)[1] ){
		counti=count_orf[j,48:101]
		normi=counti/sum(counti)
		cnorm=rbind(cnorm, normi)
	}
	#cnorm=data.frame(cnorm)
	cnorm=cnorm[order(ctot),]
	image(as.matrix(t(log2(cnorm))), col=colh, axes=F)
	
axis(1, at=seq(3,54,by=3)/53, labels=F)
	
	
	mtext(seq(0,50,by=6), side = 1, line = 1, outer = F, 
	at = seq(3,54,by=6)/53, cex=0.8)


	mtext("Position relative to start codon\n (in nt)", side=1, line=4,cex=0.8)
	
	
	mtext(paste(cname_haplo[i]," tORFs n = ",dim(cnorm)[1]), side=1, line=6,cex=0.8)
	

#tORF with both methods (our + ribotaper) ==============================
	#====================================================================
	
	select_sig=data_haplo$sig =="1" & !is.na(data_haplo$sig_rt)


	count=data.frame(matrix(as.numeric(unlist(strsplit(as.character(data_haplo$counts), 
	"-"))), ncol=101, byrow=TRUE))
	
	row.names(count)=data_haplo$name
	
	#plot profiles=====================================================
	par(mar=c(0,6,6,2))
	#iORFs
	
	sel_orf=count[select_sig & data_haplo$type=="orf" ,]
	sumo=apply(sel_orf,2,sum)[49:101]
	plot(-2:50,sumo/dim(sel_orf)[1], type="l", col=ccol[3], 
		ylab="Mean read counts)", xaxt='n',ann=FALSE, lwd=2, 
		main=paste(haplo," iORFs sig n =", dim(sel_orf)[1]),
		xlim=c(-2,48.5),ylim=c(0,clim[i]))
	
	title(main="tORFs sig + RiboTaper")
	if(i==1){
	title("C",cex.main=2,adj=0, line=4,outer=F)
}
	#ORFs##############################################################
		par(mar=c(7,6,0,2))
	
	count_orf=count[select_sig & data_haplo$type=="orf",]

	####test
	#count_orf=count_orf[1:200,]
	ctot=apply(count_orf,1,sum)
	cnorm=NULL
	for (j in 1:dim(count_orf)[1] ){
		counti=count_orf[j,48:101]
		normi=counti/sum(counti)
		cnorm=rbind(cnorm, normi)
	}
	#cnorm=data.frame(cnorm)
	cnorm=cnorm[order(ctot),]
	image(as.matrix(t(log2(cnorm))), col=colh, axes=F)
	
axis(1, at=seq(3,54,by=3)/53, labels=F)
	
	
	mtext(seq(0,50,by=6), side = 1, line = 1, outer = F, 
	at = seq(3,54,by=6)/53, cex=0.8)

	mtext("Position relative to start codon\n (in nt)", side=1, line=4,cex=0.8)
	mtext(paste(cname_haplo[i]," tORFs n = ",dim(cnorm)[1]), side=1, line=6,cex=0.8)


#tORF with only ribotaper ==============================
	#====================================================================
	
	select_sig=data_haplo$sig =="0" & !is.na(data_haplo$sig_rt)


	count=data.frame(matrix(as.numeric(unlist(strsplit(as.character(data_haplo$counts), 
	"-"))), ncol=101, byrow=TRUE))
	
	row.names(count)=data_haplo$name
	
	#plot profiles=====================================================
	par(mar=c(0,6,6,2))
	#iORFs
	
	sel_orf=count[select_sig & data_haplo$type=="orf" ,]
	sumo=apply(sel_orf,2,sum)[49:101]
	plot(-2:50,sumo/dim(sel_orf)[1], type="l", col=ccol[3], 
		ylab="Mean read counts)", xaxt='n',ann=FALSE, lwd=2, 
		main=paste(haplo," iORFs sig n =", dim(sel_orf)[1]),
		xlim=c(-2,48.5), ylim=c(0,clim[i]))
	
	title(main="tORFs RiboTaper only")
	if(i==1){
	title("D",cex.main=2,adj=0, line=4,outer=F)
}
	#ORFs##############################################################
		par(mar=c(7,6,0,2))
	
	count_orf=count[select_sig & data_haplo$type=="orf",]

	####test
	#count_orf=count_orf[1:200,]
	ctot=apply(count_orf,1,sum)
	cnorm=NULL
	for (j in 1:dim(count_orf)[1] ){
		counti=count_orf[j,48:101]
		normi=counti/sum(counti)
		cnorm=rbind(cnorm, normi)
	}
	#cnorm=data.frame(cnorm)
	cnorm=cnorm[order(ctot),]
	image(as.matrix(t(log2(cnorm))), col=colh, axes=F)
	
axis(1, at=seq(3,54,by=3)/53, labels=F)
	
	
	mtext(seq(0,50,by=6), side = 1, line = 1, outer = F, 
	at = seq(3,54,by=6)/53, cex=0.8)


	mtext("Position relative to start codon\n (in nt)", side=1, line=4, cex=0.8)
	mtext(paste(cname_haplo[i]," tORFs n = ",dim(cnorm)[1]), side=1, line=6,cex=0.8)

	
}
dev.off()
