# Script to plot one global figure with rpf coverage of a selection of candidates 
#

#ccol=c("purple1","magenta1", "springgreen3","orangered")
colrpf1="#e600e6"
colrpf2="#1a53ff"

coltot1="#00b386"
coltot2="#ff3333"


ccol=c(colrpf1,colrpf2, coltot1,coltot2)

# FUNCTIONS ##############################################################

#make transparent colors
makeTransparent<-function(someColor, alpha=100)
{
  newColor<-col2rgb(someColor)
  apply(newColor, 2, function(curcoldata){rgb(red=curcoldata[1], green=curcoldata[2],
    blue=curcoldata[3],alpha=alpha, maxColorValue=255)})
}



#Function to read coverage files and make a bilan table per ORF or library
parse_coverage = function(coverage_files,chaplo,clib,type) {
	
	#data_all will contains coverage for all haplotypes 
	data_all=NULL
	
	for (i in 1:length(coverage_files)){
		#select file and haplo
		filei=coverage_files[i]
		haploi=chaplo[i]
		libi=clib[i]
		#check file size and read if not =0 lines 
		nblines=file.info(filei)$size
		
		#read file and add to dataall
		if(nblines !=0){
			datai=read.table(filei)
			datai$lib=rep(libi, dim(datai)[1])
			datai$haplo=rep(haploi, dim(datai)[1])
			datai$type=rep(type, dim(datai)[1])
			data_all=rbind(data_all, datai)
		}
	
	}
	return (data_all)
}


#Function to plot a simplify Spar phylogeny (simplified Spar cladogramm)

plot_phylo=function(){
	lwi=2
	#plot empty box
	#par(mar=c(0,0,0,0))
	plot(1:10, 1:10, type="n", xlim=c(2,15), ylim=c(-9,-1.5), axes=FALSE, xlab="", ylab="", main="")
	#plot branches
	lines(c(8,10),c(-8,-8),lwd=lwi)
	lines(c(8,10),c(-6,-6),lwd=lwi)
	lines(c(8,8),c(-6,-8),lwd=lwi)
	
	lines(c(6,8),c(-7,-7),lwd=lwi)
	lines(c(6,10),c(-4,-4),lwd=lwi)
	lines(c(6,6),c(-4,-7),lwd=lwi)
	
	lines(c(4,6),c(-5.5,-5.5),lwd=lwi)
	lines(c(4,10),c(-2,-2),lwd=lwi)
	lines(c(4,4),c(-2,-5.5),lwd=lwi)
	#lines(c(2,4),c(-3.75,-3.75))
	text(c(12,12,12,12), c(-8,-6,-4,-2), labels=c(expression(bolditalic("SpC")), 
	expression(bolditalic("SpB")), expression(bolditalic("SpA")),expression(bolditalic("Scer"))),cex=1)

	points(8,-7,pch=20, cex=2,col=2)
	text(8,-7,labels="N1", pos=4, col=2,cex=1)
	points(6,-5.5,pch=20, cex=2,col=2)
	text(6,-5.5,labels="N2", pos=4, col=2,cex=1)

}


#function to plot density from coverage (2 replicates on the same plot)

plot_density= function(data_lib1,data_lib2,  ymax, sens, type, start,end, ext, haploi, ORF_id, corf){
	
	
	colib=ccol[1:2]
	
	if (sens=="-"){
		data_lib1$V2=-1*data_lib1$V2
		data_lib2$V2=-1*data_lib2$V2
		
		end_rec=end
		start_rec=start
		start=-1*end_rec
		end=-1*start_rec
	}
	#empty plot
	if(ORF_id==corf[1]){
		maini=ORF_id
		ylabi="RPF read density"
	}else{
		maini=""
		ylabi=""
		
	}
	
	
	##test with log 
	#data_lib1$countnorm=log2(data_lib1$countnorm+1)
	#data_lib2$countnorm=log2(data_lib2$countnorm+1)
	#
	#ymax=log2(ymax+1)

	plot(data_lib1$V2,smooth(data_lib1$countnorm),type="n", ylim=c(0,ymax), xlim=c(start-ext,end+ext),
	xaxt="n", xlab="", ylab="",main="", cex=0.8,cex.axis=0.8,tcl=-0.3)
	title(ylab=ylabi, line=2, cex.lab=0.8)
    
	
	#add coverage rep1
	xx1=c(data_lib1$V2,rev(data_lib1$V2))
	xy1=c(rep(0, nrow(data_lib1)), rev(smooth(data_lib1$countnorm)))
	polygon(xx1,xy1, col=makeTransparent(colib[1],160), border=NA)
	
	#add coverage rep2
	
	xx2=c(data_lib2$V2,rev(data_lib2$V2))
	xy2=c(rep(0, nrow(data_lib2)), rev(smooth(data_lib2$countnorm)))
	polygon(xx2,xy2, col=makeTransparent(colib[2],140), border=NA)

	#add start and stop info of the ORF of interrest
	#abline(v=start, lty=2, col="grey40")
	#abline(v=end, lty=2, col="grey40")
	
}

#Function to make the entire figure ############################
#plot phylogeny
# add coverage and ORFs


plot_allcov=function(corf,  clib, chaplo, ext, minsize, factors){
	#ex ORF_id="ORF_102655"
	# type="RPF"
	#prepare layout
	#make a matrix depenndin on the number of haplotypes
	matl=c(2,3,4,5,6,7,8,9,10,11)
	matlayout=matl
	for(colnum in 1:(length(corf)-1)){
		matlayout=c(matlayout,matl+(colnum*10))
	}
	matlayout=matrix(matlayout, ncol=length(corf), byrow=F)
	
	#add last names lines
	matlayout=rbind(matlayout,
		1:length(corf)+max(matlayout))
	#add first column 
	
	matlayout=cbind(
		c(rep(1,8),max(matlayout)+1:3),
		matlayout)
		
	#matlayout=matrix(c(
	#	1,8,18,28,38,
	#	1,9,19,29,39,
	#	1,6,16,26,36, 
	#	1,7,17,27,37,
	#	1,4,14,24,34,
	#	1,5,15,25,35,
	#	1,2,12,22,32,
	#	1,3,13,23,33, 
	#	46, 10,20,30,40,
	#	47,11,21,31,41,
	#	48,42,43,44,45
	#	), ncol=5, byrow = TRUE)
	
	cwith=rep(6,dim(matlayout)[2])
	
	layout(matlayout,
		widths=cwith, heights=c(5,2,5,2,5,2,5,2,2,2,2))
	
	#add the phylogeny on the left 
	par(mar=c(0,0,2,0))
	plot_phylo()
	
	#Add coverages : ===================================================
	
	for (ORF_id in corf){
		#take orf coordinates
		coord=read.table(paste("../../../07_candidates/02_ORF_bam/",ORF_id,"_coord.gff", sep=""))
		
		if (coord$V7[1]=="+"){
			strand="sens"
		}else{
			strand="anti"
		}
		#par(mar=c(2,2,2,2))
		type="RPF"
		cov_files=c(paste("../../../07_candidates/02_ORF_bam/",ORF_id,"_Y128_ext100_",type,"_rep1_",strand,".cov", sep=""),
		paste("../../../07_candidates/02_ORF_bam/",ORF_id,"_Y128_ext100_",type,"_rep2_",strand,".cov", sep=""),
		paste("../../../07_candidates/02_ORF_bam/",ORF_id,"_SD01_ext100_",type,"_rep1_",strand,".cov",sep=""),
		paste("../../../07_candidates/02_ORF_bam/",ORF_id,"_SD01_ext100_",type,"_rep2_",strand,".cov",sep=""),
		paste("../../../07_candidates/02_ORF_bam/",ORF_id,"_SD06_ext100_",type,"_rep1_",strand,".cov", sep=""),
		paste("../../../07_candidates/02_ORF_bam/",ORF_id,"_SD06_ext100_",type,"_rep2_",strand,".cov", sep=""),
		paste("../../../07_candidates/02_ORF_bam/",ORF_id,"_SA03_ext100_",type,"_rep1_",strand,".cov", sep=""),
		paste("../../../07_candidates/02_ORF_bam/",ORF_id,"_SA03_ext100_",type,"_rep2_",strand,".cov", sep=""))
		
		
		tcoef=factors[,c(1,3)]
		colnames(tcoef)=c("coef","lib")
		
		#parse tables 
		data_rpf=parse_coverage(cov_files, chaplo, clib, type=type)
		
		#normalize replicates
		data_rpf=merge(data_rpf, tcoef, by.xy="lib") #add normalization factor
		data_rpf$countnorm=data_rpf$V3/data_rpf$coef
		
		#create empty table to use if no reads
		data_null=data_rpf[1,]
		data_null$V3[1]=0
		data_null$countnorm[1]=0

		#coverage figures 
		ymax=max(data_rpf$countnorm+10)
		#manually change ymax for orf closed to very expressed regions 
		if (ORF_id=="ORF_163629"){
			ymax=300
		}
		
		if (ORF_id=="ORF_135398"){
			ymax=100
		}
		
		if (ORF_id=="ORF_23483"){
			ymax=100
		}

		if (ORF_id=="ORF_102533"){
			ymax=100
		}
	
		for (i in seq(1,8, by=2)){
			haploi=chaplo[i]
			lib1=clib[i]
			lib2=clib[i+1]
			
			#take coverage foreach replicate per type and haplo
			data_lib1=data_rpf[data_rpf$lib==lib1,]
			data_lib2=data_rpf[data_rpf$lib==lib2,]

			# take coordinate info 
			starti=coord$V4[coord$V10==haploi]
			endi=coord$V5[coord$V10==haploi]
			sensi=coord$V7[coord$V10==haploi]
			
			#if(haploi=="SA03"){
			#	par(mar=c(0,2,2,1))
			#}else {
				par(mar=c(0,3,1,1))
			#}
			
            
			if (dim(data_lib1)[1]>=1 & dim(data_lib2)[1]>=1){
				plot_density(data_lib1, data_lib2, ymax, sensi, type, starti,endi, ext, haploi,ORF_id, corf)
			}
			else if (dim(data_lib1)[1]>=1 & dim(data_lib2)[1]< 1){
				plot_density(data_lib1, data_null, ymax, sensi, type, starti,endi, ext, haploi, ORF_id, corf)
			}
			else if (dim(data_lib1)[1]<1 & dim(data_lib2)[1]>= 1){
				plot_density(data_null, data_lib2, ymax, sensi, type, starti,endi, ext, haploi, ORF_id, corf)
			}
			else if (dim(data_lib1)[1]<1 & dim(data_lib2)[1]< 1){
				plot_density(data_null, data_null, ymax, sensi, type, starti,endi, ext, haploi, ORF_id, corf)
			}
			
			
		#plot all ORFs overlapping the region on the same strand
			
			
			all_coord=read.table(paste("../../../07_candidates/02_ORF_bam/",ORF_id,"_",haploi,"_allORF.gff", sep=""))
			size=all_coord$V5-all_coord$V4+1
			all_sel=all_coord[size >=minsize,]
			all_sels=all_sel[all_sel$V7==as.character(sensi),] 
			
			if (sensi=="-"){
				cstart=all_sels$V4
				cstop=all_sels$V5
				all_sels$V4=-1*cstop
				all_sels$V5=-1*cstart
				
				startisens=starti
				endisens=endi
				starti=-1*endisens
				endi=-1*startisens
			}
			 
			#test if in the same reading frame 
			start_frame1=seq(starti-ext,endi+ext, by=3)
			
			#sort orfs
			all_sels=all_sels[order(all_sels$V4),]
			
			par(mar=c(0,3,0,1))
			plot(1:6,1:6,type="n", axes=F, xlim=c(starti-ext, endi+ext),ylim=c(-8, 0),xlab="", ylab="")
			#plot(1:6,1:6,type="n", xaxt="n",yaxt="n", xlim=c(starti-ext, endi+ext),ylim=c(-8, 0),xlab="", ylab="")

			
			
				if(dim(all_sels)[1]>=1){
					for (j in 1:dim(all_sels)[1]){
						startj=all_sels$V4[j]
						stopj=all_sels$V5[j]
						
						# check if in the interval given for te plot
						if (stopj > starti-ext){
					
							#plot the ORF if start in the same reading frame than the candidate
							if (startj %in% start_frame1) {
								rect(startj, -1*j, stopj,(-1*j+0.7), col="black", border="white")
							} else {
								rect(startj, -1*j, stopj,(-1*j+0.7), col="grey", border="white")
							}
							
							#add a tag if candidate
							if (all_sels$V3[j]==ORF_id){
								points(starti-ext,(-1*j+0.3),pch=18, col="#ffcc00", cex=2)
							}
						}
					}
					
				}
			
			
			}
		
			#Add ancestral ORFs 
			
			for (haplorec in c("N1", "N2")){
				all_coord=read.table(paste("../../../07_candidates/02_ORF_bam/",ORF_id,"_",haplorec,"_allORF.gff", sep=""))
				size=all_coord$V5-all_coord$V4+1
				all_sel=all_coord[size >=minsize,]
				sensi=coord$V7[1]
				all_sels=all_sel[all_sel$V7==as.character(sensi),] 
				
				#take start coordinate of the candidate (in syntenic regions)
				coord_sid=read.table(paste("../../../07_candidates/02_ORF_bam/",ORF_id,"_",haplorec,".bed", sep=""))
				starti=coord_sid$V2[1]
				
				endi=coord_sid$V3[1]
				
				if (sensi=="-"){
					cstart=all_sels$V4
					cstop=all_sels$V5
					all_sels$V4=-1*cstop
					all_sels$V5=-1*cstart
				
					startisens=starti
					endisens=endi
					starti=-1*endisens
					endi=-1*startisens
				}
				
				#test if in the same reading frame 
				start_frame1=seq(starti-ext,endi+ext, by=3)
				
				#sort orfs
				#all_sels=all_sels[order(all_sels$V4),]
				
				par(mar=c(0.2,3,0.2,1))
				plot(1:6,1:6,type="n", xaxt="n",yaxt="n", xlim=c(starti-ext, endi+ext),ylim=c(-8, 0),xlab="", ylab="")
			
				if (ORF_id==corf[1]){
					mtext(haplorec, side = 2, line=2, las=2, cex=0.7,col=2) 
				}
				if(dim(all_sels)[1]>=1){
					for (j in 1:dim(all_sels)[1]){
						startj=all_sels$V4[j]
						stopj=all_sels$V5[j]
						
						# check if in the interval given for te plot
						if (stopj > starti-ext){
											
							#plot the ORF if start in the same reading frame than the candidate
							if (startj %in% start_frame1) {
								rect(startj, -1*j, stopj,(-1*j+0.7), col="black", border="white")
							} else {
								rect(startj, -1*j, stopj,(-1*j+0.7), col="grey", border="white")
							}
							
							#add a tag if candidate
							if (all_sels$V3[j]==ORF_id){
								points(starti-ext,(-1*j+0.3),pch=18, col="#ffcc00", cex=2)
							}
						}
					}
					
				}
			}
		}
	#add  orf names 
	for (ORF_id in corf){
		plot(1:5,1:5,type="n", axes=F,xlab="", ylab="")
		text(3,3,label=paste("t",ORF_id,sep=""),cex=0.8)
	}
}



# ANALYSIS ############################################################

clib=c("Y_rep1", "Y_rep2", "A_rep1", "A_rep2", "B_rep1", "B_rep2", "C_rep1","C_rep2")
chaplo=c("Y128","Y128", "SD01", "SD01", "SD06", "SD06", "SA03", "SA03")

ext=15 #nbr of nucleotides to add outside orf for visualization
minsize=0 # no minimum size

#read size factors estimates obtained with deseq2 for replicates normalization
factors=read.table("../../tables_out/04counts/Deseq2/lib_sizefactors_deseq2.txt")
factors$lib=clib
dir_out="Figures_manuscript/" #for figures 

#ORF_id="ORF_102655" # new name of ORF_90265 ###########################
#read candidates for visualizatioon

#Figure with one selected exemple per category
#==============================================================
corf=c( "ORF_173573", "ORF_72149", "ORF_70680","ORF_162702","ORF_69174" )

out_name=paste(dir_out,"5.selectRPF_phylo.tiff", sep="")

tiff(out_name,width = 6450, height = 5400, units = "px",res=950)

	plot_allcov(corf, clib, chaplo, ext, minsize, factors)
dev.off()




#Figure with all candidates (one figure per lineage specific category)
#==============================================================
data_orf=read.table("../../04diff_expression/80518_expressiongroup_ORF.txt")
data_orf=data_orf[data_orf$V3=="Inc",]

dir_out="Supplementary_Fig/"
name_b=paste(dir_out,"S7.SpB_selectRPF_phylo.tiff", sep="")

corf_b=data_orf$V1[data_orf$V2=="B"] #list of SpB ORF candidates

tiff(name_b, width = 6450, height = 5400, units = "px",res=950)
	plot_allcov(corf_b, clib, chaplo, ext, minsize, factors)
dev.off()



name_c=paste(dir_out,"S7.SpC_selectRPF_phylo.tiff", sep="")

corf_c=data_orf$V1[data_orf$V2=="C"] 
tiff(name_c,width = 10000, height = 5400, units = "px",res=950)

	plot_allcov(corf_c, clib, chaplo, ext, minsize, factors)
dev.off()


name_bc=paste(dir_out,"S7.SpBC_selectRPF_phylo.tiff", sep="")
tiff(name_bc,width = 5000, height = 5400, units = "px",res=950)

	corf_bc=data_orf$V1[data_orf$V2=="BC"] 
	plot_allcov(corf_bc, clib, chaplo, ext, minsize, factors)
dev.off()



name_a1=paste(dir_out,"S7.SpA1_selectRPF_phylo.tiff", sep="")

name_a2=paste(dir_out,"S7.SpA2_selectRPF_phylo.tiff", sep="")
name_a3=paste(dir_out,"S7.SpA3_selectRPF_phylo.tiff", sep="")


corf_a=data_orf$V1[data_orf$V2=="A"] 
tiff(name_a1,width = 9000, height = 5400, units = "px",res=950)

	plot_allcov(corf_a[1:7], clib, chaplo, ext, minsize, factors)
dev.off()

tiff(name_a2,width = 9000, height = 5400, units = "px",res=950)

	plot_allcov(corf_a[8:14], clib, chaplo, ext, minsize, factors)
dev.off()

tiff(name_a3,width = 9000, height = 5400, units = "px",res=950)

	plot_allcov(corf_a[15:20], clib, chaplo, ext, minsize, factors)
dev.off()





#problemes with  114046 et 40004 to small nb of initiating peak


name_a=paste(dir_out,"S7.Spar_selectRPF_phylo.tiff", sep="")
tiff(name_a,width = 10000, height = 5400, units = "px",res=950)

	corf_a=data_orf$V1[data_orf$V2=="ABC"] #list of SpB ORF candidates
	plot_allcov(corf_a, clib, chaplo, ext, minsize, factors)
dev.off()
