## load required libraries
library(compiler)
library(deSolve)

#################
## utilities #########
###################

######## log2ratio
## given a matrix (m,n) representing a time course of observation 
## where items are along rows and time points along columns, returns
## the log2ratio compared to the time 0 (first column)
## Arguments:
##   > "x" a (m,n) matrix
log2ratio <- function(x) log2(x)-log2(x[,1])

######## matrixRank
## given a matrix (m,n), returns column ranks 
## Arguments:
##   > "mat" a (m,n) matrix
##   > "na.last" for controlling the treatment of NAs. 
##		If TRUE, missing values in the data are put last
## 		if FALSE, they are put first; if NA, they are removed; 
##		if "keep" they are kept with rank NA.
matrixRank <- function(mat, na.last='keep') {
	n <- nrow(mat)
	m <- ncol(mat)
	newmat <- matrix(rank(as.vector(mat), na.last=na.last), nrow=n, ncol=m)
	colnames(newmat) <- colnames(mat)
	rownames(newmat) <- rownames(mat)
	newmat
}

######## makeBreaks
## makes vector of breaks for heatmaps according to a common root 
## in the names of a character vector
## Arguments:
##   > "labels" a character vector
##   > "split" a character which identifies the end of the root
makeBreaks <- function(labels, split="_") {
	groups <- sapply(strsplit(labels,split),'[',1)
	ix <- sapply(1:(length(groups)-1), function(i) groups[i]!=groups[i+1])
	which(ix)
}

######## corDist
## gives a measure of the distance between rows of a data matrix that
## is based on thir correlation. Distance will be always between 0 and 1.
## Arguments:
##   > "data" a numeric matrix
corDist <- function(data, use = "everything") 
	as.dist((1 - cor(t(data),use=use))/2)

######## abline2D
## Given the output of the linar modeling Z ~ X+Y, this function
## plot on the X-Y plane the line corresponding to points where the
## linear model equals to zero ( X + Y = Z = 0) 
## Arguments:
##   > "lmOut" the output of the linear modeling function
abline2D <- function( lmOut, ... ) {
        coeff <- lmOut$coefficients
        a <- -(coeff[2]/coeff[3])
        b <- -(coeff[1]/coeff[3])
        abline( b,  a, ... )
}

######## vec2cols
## Generates a vector of colors from a numeric based on 
## a colorscale and a vector of breaks
## Arguments:
##   > "x" a numeric vector
##   > "cols" the color palette
##   > "breaks" a vector of breaks (should be one element
##		 longer than the color palette)
vec2cols <- function(x, cols, breaks) {
	stopifnot(length(cols) == (length(breaks)-1))
	cols[findInterval(x, breaks, all.inside=TRUE)]
}

######## vec2cols
## Recursively merges a list that contains data.frames
## Arguments:
##   > "df_list" a list of data.frame
##   > "by" a character indicatind the column names used to merge
merge_list <- function(df_list, by, ...)
{
	for( i in seq_along(df_list) )
		colnames(df_list[[i]])[!colnames(df_list[[i]]) %in% by] <- 
			paste(colnames(df_list[[i]])[!colnames(df_list[[i]]) %in% by],names(df_list)[i],sep='.')
	df_out <- df_list[[1]]
	for( i in seq_along(df_list)[-1] ) {
		df_out <- merge(df_out, df_list[[i]], ...)
	}
	return(df_out)
}

######## smoothXYlims
## makes a smoothScatter plot where the points outside the plot limits
## are depicted on the border, but with a different pch ( pch=2 ) 
## Arguments:
##   > "x" a numeric
##   > "y" a numeric of the same length of x
##   > "xlim" a numeric of length 2 with the limits of x axis (by default the entire range)
##   > "ylim" a numeric of length 2 with the limits of y axis (by default the entire range)
smoothXYlims <- function(x, y, xlim=range(x[is.finite(x)]), ylim=range(y[is.finite(y)]), ...)
{
  x_low <- x<xlim[1]
  x_hig <- x>xlim[2]
  y_low <- y<ylim[1]
  y_hig <- y>ylim[2]

  x[x_low] <- xlim[1]
  x[x_hig] <- xlim[2]
  y[y_low] <- ylim[1]
  y[y_hig] <- ylim[2]

  smoothScatter(x, y, xlim=xlim, ylim=ylim, ...)
  points(x[x_low|x_hig|y_low|y_hig], y[x_low|x_hig|y_low|y_hig], pch=2, cex=.5, ...)

}

######## plotLoess
## draws a line of Local Polynomial Regression Fitting (loess) upon an
## already existing XY plot ( the regression will be based on Y ~ X )
## Arguments:
##   > "x" a numeric
##   > "y" a numeric of the same length of x
##   > "qt" a numeric of length 2 with the quantiles where to cut the regression
##   > "na.rm" a logical to remove NAs observation
plotLoess <- function(x, y, qt=c(.02,.98), na.rm=FALSE, ...) {
	filt <- x>quantile(x,qt[1]) & x<quantile(x,qt[2])
	if( na.rm ) filt <- filt & is.finite(x) & is.finite(y)
	x <- x[filt]; y <- y[filt]
	loessOut <- loess( y ~ x )
	ord <- order(x)
	#lines( x[ord], predict(loessOut)[ord] , ...)
	lines( x[ord], predict(loessOut, newdata=data.frame(x=x[ord])) , ...)
}

######################################## 
## generation of the annotation ###########
######################################## 

###### make_annotation 
## make annotation will return a set of GRanges accordingly to the 
## parameters given to the function. Each GRange represents a single gene. 
## In case more than one transcript is annotated for a gene, all transcripts are
## merged in order to obtain the largest region possible associated to 
## that gene. In case a gene has non overlapping transcripts associated, the 
## gene is removed.
## Arguments:
##   - "annotation" can be either "singleChr" or "genome"
##   - "singleChrName" must specify chromosome name (in case "singleChr" is selected)
##   - "annotationFeature" a character can be either "TSS", "GB", "TES"
##       > "TSS" returns the TSS region defined by the coordinates defined in 
##			promoter_limits
##       > "TES" returns the TES region defined by the coordinates defined in 
##			promoter_limits
##       > "GB" return the region between the TSS and the TES
##   - "promoter_limits" a numeric of length 2, defines the upstream and
## 		downstream distances from the annotated transcription-start-site
##   - "termination_limits" a numeric of length 2, defines the upstream and
## 		downstream distances from the annotated transcription-end-site
##   - "txdb" a TxDb object
make_annotation <- function(annotation, singleChrName, annotationFeature, 
	promoter_limits=c(50, 300), termination_limits=c(1000, 4000),
	txdb=TxDb.Mmusculus.UCSC.mm9.knownGene) {

	promoter_start <- promoter_limits[1]
	promoter_end   <- promoter_limits[2]

	termination_start <- termination_limits[1]
	termination_end   <- termination_limits[2]

	message('Creating annotation...')
	reduceLevels <- function(grange) {
		grange@seqnames <- droplevels(grange@seqnames)
		grange@seqinfo <- grange@seqinfo[levels(grange@seqnames)]
		grange }
	# library(TxDb.Mmusculus.UCSC.mm9.knownGene)
	# txdb <- TxDb.Mmusculus.UCSC.mm9.knownGene
	tx <- transcripts(txdb)
	names(tx) <- select(txdb, keys = tx$tx_name
		, keytype = "TXNAME", columns = "GENEID")$GENEID
	if( annotation == 'singleChr' )
	{######## single chromosome
		tx <- reduceLevels(tx[seqnames(tx)==singleChrName])
	}
	# remove NA named genes
	tx <- tx[!is.na(names(tx))]
	######## select a unique feature per each gene
	tx <- reduce(split(tx, names(tx)))
	# remove genes which still have more than one transcript after reducing...
	tx <- unlist(tx[elementNROWS(tx)==1])
	annotationGR <- tx
	######## create either gene body or promoter annotation		
	annotationGR <- switch(annotationFeature
		# for GB, reduce and unlist the GRList split by the name of the gene
		, 'GB'={
			# remove the promoter region from the gene body
			#### PLUS
			start(annotationGR[strand(tx)=='+']) <- 
				apply(cbind(
					start(tx[strand(tx)=='+'])+(promoter_end+1)
					, end(tx[strand(tx)=='+'])+1
					), 1, min)
			end(annotationGR[strand(tx)=='+']) <-
				apply(cbind(
					start(tx[strand(tx)=='+'])+promoter_end+1
					, end(tx[strand(tx)=='+'])-termination_start
					), 1, max)
			#### MINUS
			start(annotationGR[strand(tx)=='-']) <- 
				apply(cbind(
					end(tx[strand(tx)=='-'])-promoter_end-1
					, start(tx[strand(tx)=='-'])+termination_start
					), 1, min)
			end(annotationGR[strand(tx)=='-']) <- 
				apply(cbind(
					end(tx[strand(tx)=='-'])-(promoter_end+1)
					, start(tx[strand(tx)=='-'])-1
					), 1, max)
			trim(annotationGR)
		}
		, 'TSS'={
			# keep only the promoter region (plus strand)
			start(annotationGR[strand(tx)=='+']) <- 
				start(tx[strand(tx)=='+'])-promoter_start
			end(annotationGR[strand(tx)=='+']) <- 
				apply(cbind(
					start(tx[strand(tx)=='+'])+promoter_end
					, end(tx[strand(tx)=='+'])
					), 1, min)
			# keep only the promoter region (minus strand)
			end(annotationGR[strand(tx)=='-']) <- 
				end(tx[strand(tx)=='-'])+promoter_start
			start(annotationGR[strand(tx)=='-']) <- 
				apply(cbind(
					end(tx[strand(tx)=='-'])-promoter_end
					, start(tx[strand(tx)=='-'])
					), 1, max)
			trim(annotationGR)
		}
		, 'TES'={
			# keep only the promoter region (plus strand)
			end(annotationGR[strand(tx)=='+']) <- 
				end(tx[strand(tx)=='+'])+termination_end
			start(annotationGR[strand(tx)=='+']) <- 
				apply(cbind(
					start(tx[strand(tx)=='+'])+promoter_end+2
					, end(tx[strand(tx)=='+'])-(termination_start-1)
					), 1, max)
			# keep only the promoter region (minus strand)
			start(annotationGR[strand(tx)=='-']) <- 
				start(tx[strand(tx)=='-'])-termination_end
			end(annotationGR[strand(tx)=='-']) <- 
				apply(cbind(
					end(tx[strand(tx)=='-'])-promoter_end-2
					, start(tx[strand(tx)=='-'])+(termination_start-1)
					), 1, min)
			trim(annotationGR)
		}
		)

	return(annotationGR)

}

###### make_transcripts_annotation (modify)
## make annotation will return a set of GRanges accordingly to the 
## parameters given to the function. Each GRange represents a single gene. 
## In case more than one transcript is annotated for a gene, all transcripts are
## merged in order to obtain the largest region possible associated to 
## that gene. In case a gene has non overlapping transcripts associated, the 
## gene is removed.
## Arguments:
##   - "annotation" can be either "singleChr" or "genome"
##   - "singleChrName" must specify chromosome name (in case "singleChr" is selected)
##   - "annotationFeature" a character can be either "TSS", "GB", "TES"
##       > "TSS" returns the TSS region defined by the coordinates defined in 
##			promoter_limits
##       > "TES" returns the TES region defined by the coordinates defined in 
##			promoter_limits
##       > "GB" return the region between the TSS and the TES
##   - "promoter_limits" a numeric of length 2, defines the upstream and
## 		downstream distances from the annotated transcription-start-site
##   - "termination_limits" a numeric of length 2, defines the upstream and
## 		downstream distances from the annotated transcription-end-site
##   - "txdb" a TxDb object
make_transcripts_annotation <- function(txdb) {
	print('Creating the annotation...')
	################
	## define 5'UTR, exons, introns, 3'UTR
	#################
	myGenetx <- reduce(transcriptsBy(txdb,'gene'))
	myGenetx <- unlist(myGenetx[elementNROWS(myGenetx)==1])
	myGenetxPlus <- myGenetx[strand(myGenetx)=='+']
	myGenetxMinus <- myGenetx[strand(myGenetx)=='-']
	unambiguousTx <- names(myGenetx)
	####################
	## 5'UTR - exons ######
	###################
	print("1/6 : 5'UTR/exons...")
	fiveUtrGR <- fiveUTRsByTranscript(txdb, use.names=TRUE)
	fiveUtrMatch <- select(txdb, keys = names(fiveUtrGR), keytype = "TXNAME", columns = "GENEID")
	#### annotate the 5utr db (plus strand)
	myFiveUtrDB <- fiveUtrMatch[fiveUtrMatch$GENEID%in%names(myGenetxPlus),]
	myFiveUtrDB$start <- sapply(start(fiveUtrGR[myFiveUtrDB$TXNAME]),min)
	myFiveUtrDB$exons <- sapply(start(fiveUtrGR[myFiveUtrDB$TXNAME]),length)
	myFiveUtrDB$txstart <- start(myGenetxPlus[myFiveUtrDB$GENEID])
	# select the 5'UTR that match the start of the annotation
	myFiveUtrDB <- myFiveUtrDB[myFiveUtrDB$start == myFiveUtrDB$txstart,]
	# remove the identical 5'UTR annotations
	myFiveUtrDB <- myFiveUtrDB[!duplicated(myFiveUtrDB[,-1]),]
	# keep only the most complete annotation
	myFiveUtrDB <- do.call('rbind',lapply(split(myFiveUtrDB,myFiveUtrDB$GENEID), function(x) x[which.max(x$exons),]))
	# create the GRanges
	myFiveUtrGRplus <- fiveUtrGR[myFiveUtrDB$TXNAME]
	names(myFiveUtrGRplus) <- myFiveUtrDB$GENEID
	#### annotate the 5utr db (minus strand)
	myFiveUtrDB <- fiveUtrMatch[fiveUtrMatch$GENEID%in%names(myGenetxMinus),]
	myFiveUtrDB$start <- sapply(end(fiveUtrGR[myFiveUtrDB$TXNAME]),max)
	myFiveUtrDB$exons <- sapply(end(fiveUtrGR[myFiveUtrDB$TXNAME]),length)
	myFiveUtrDB$txstart <- end(myGenetxMinus[myFiveUtrDB$GENEID])
	# select the 5'UTR that match the start of the annotation
	myFiveUtrDB <- myFiveUtrDB[myFiveUtrDB$start == myFiveUtrDB$txstart,]
	# remove the identical 5'UTR annotations
	myFiveUtrDB <- myFiveUtrDB[!duplicated(myFiveUtrDB[,-1]),]
	# keep only the most complete annotation
	myFiveUtrDB <- do.call('rbind',lapply(split(myFiveUtrDB,myFiveUtrDB$GENEID), function(x) x[which.max(x$exons),]))
	# create the GRanges
	myFiveUtrGRminus <- fiveUtrGR[myFiveUtrDB$TXNAME]
	names(myFiveUtrGRminus) <- myFiveUtrDB$GENEID
	#### concatenate
	myFiveUtrGRExons <- c(myFiveUtrGRplus, myFiveUtrGRminus)
	#######################
	## 5'UTR - introns ######
	#####################
	print("2/6 : 5'UTR/introns...")
	myFiveUtrGRIntrons <- psetdiff(unlist(range(myFiveUtrGRExons)),myFiveUtrGRExons)
	myFiveUtrGRIntrons <- myFiveUtrGRIntrons[elementNROWS(myFiveUtrGRIntrons)>0]
	####################
	## 3'UTR - exons ######
	###################
	print("3/6 : 3'UTR/exons...")
	threeUtrGR <- threeUTRsByTranscript(txdb, use.names=TRUE)
	threeUtrMatch <- select(txdb, keys = names(threeUtrGR), keytype = "TXNAME", columns = "GENEID")
	#### annotate the 5utr db (plus strand)
	myThreeUtrDB <- threeUtrMatch[threeUtrMatch$GENEID%in%names(myGenetxPlus),]
	myThreeUtrDB$end <- sapply(end(threeUtrGR[myThreeUtrDB$TXNAME]),max)
	myThreeUtrDB$exons <- sapply(end(threeUtrGR[myThreeUtrDB$TXNAME]),length)
	myThreeUtrDB$txend <- end(myGenetxPlus[myThreeUtrDB$GENEID])
	# select the 5'UTR that match the end of the annotation
	myThreeUtrDB <- myThreeUtrDB[myThreeUtrDB$end == myThreeUtrDB$txend,]
	# remove the identical 5'UTR annotations
	myThreeUtrDB <- myThreeUtrDB[!duplicated(myThreeUtrDB[,-1]),]
	# keep only the most complete annotation
	myThreeUtrDB <- do.call('rbind',lapply(split(myThreeUtrDB,myThreeUtrDB$GENEID), function(x) x[which.max(x$exons),]))
	# create the GRanges
	myThreeUtrGRplus <- threeUtrGR[myThreeUtrDB$TXNAME]
	names(myThreeUtrGRplus) <- myThreeUtrDB$GENEID
	#### annotate the 5utr db (minus strand)
	myThreeUtrDB <- threeUtrMatch[threeUtrMatch$GENEID%in%names(myGenetxMinus),]
	myThreeUtrDB$start <- sapply(start(threeUtrGR[myThreeUtrDB$TXNAME]),min)
	myThreeUtrDB$exons <- sapply(start(threeUtrGR[myThreeUtrDB$TXNAME]),length)
	myThreeUtrDB$txend <- start(myGenetxMinus[myThreeUtrDB$GENEID])
	# select the 5'UTR that match the start of the annotation
	myThreeUtrDB <- myThreeUtrDB[myThreeUtrDB$start == myThreeUtrDB$txend,]
	# remove the identical 5'UTR annotations
	myThreeUtrDB <- myThreeUtrDB[!duplicated(myThreeUtrDB[,-1]),]
	# keep only the most complete annotation
	myThreeUtrDB <- do.call('rbind',lapply(split(myThreeUtrDB,myThreeUtrDB$GENEID), function(x) x[which.max(x$exons),]))
	# create the GRanges
	myThreeUtrGRminus <- threeUtrGR[myThreeUtrDB$TXNAME]
	names(myThreeUtrGRminus) <- myThreeUtrDB$GENEID
	#### concatenate
	myThreeUtrGRExons <- c(myThreeUtrGRplus, myThreeUtrGRminus)
	#######################
	## 3'UTR - introns ######
	#####################
	print("4/6 : 3'UTR/introns...")
	myThreeUtrGRIntrons <- psetdiff(unlist(range(myThreeUtrGRExons)),myThreeUtrGRExons)
	myThreeUtrGRIntrons <- myThreeUtrGRIntrons[elementNROWS(myThreeUtrGRIntrons)>0]
	######################
	## Coding - regions ######
	#######################
	myCodingRegions <- myGenetx
	myFiveUtrGenes <- names(myFiveUtrGRExons)
	myCodingRegions[myFiveUtrGenes] <- psetdiff(myCodingRegions[myFiveUtrGenes],unlist(range(myFiveUtrGRExons)))
	myThreeUtrGenes <- names(myThreeUtrGRExons)
	myCodingRegions[myThreeUtrGenes] <- psetdiff(myCodingRegions[myThreeUtrGenes],unlist(range(myThreeUtrGRExons)))
	####################
	## Coding - exons ######
	###################
	print('5/6 : Creating Coding/exons...')
	myExonsGR <- disjoin(exonsBy(txdb,'gene'))[names(myCodingRegions)]
	myCDSExonsGR <- pintersect(myExonsGR,myCodingRegions,drop.nohit.ranges=TRUE)
	######################
	## Coding - introns ######
	#######################
	print('6/6 : Creating Coding/introns...')
	myCDSIntronsGR <- psetdiff(unlist(range(myCDSExonsGR)),myCDSExonsGR)
	myCDSIntronsGR <- myCDSIntronsGR[elementNROWS(myCDSIntronsGR)>0]
	############
	## Gather ######
	#############
	print('Creating empty GRangesList... (This step could be long, up to 2-3 minutes)')
	emptyGRList <- GRangesList(sapply(unambiguousTx, function(x) GRanges()))
	#    user  system elapsed 
	# 139.860   0.000 139.869
	#+ 31.696   0.420  32.126 
	threeUTRintrons <- threeUTRexons <- CDSintrons <- CDSexons <- fiveUTRintrons <- fiveUTRexons <- emptyGRList
	fiveUTRexons[names(myFiveUtrGRExons)] <- myFiveUtrGRExons
	fiveUTRintrons[names(myFiveUtrGRIntrons)] <- myFiveUtrGRIntrons
	CDSexons[names(myCDSExonsGR)] <- myCDSExonsGR
	CDSintrons[names(myCDSIntronsGR)] <- myCDSIntronsGR
	threeUTRexons[names(myThreeUtrGRExons)] <- myThreeUtrGRExons
	threeUTRintrons[names(myThreeUtrGRIntrons)] <- myThreeUtrGRIntrons
	txAnnotation <- list(
		"fiveUTRexons"=fiveUTRexons,
		"fiveUTRintrons"=fiveUTRintrons,
		"CDSexons"=CDSexons,
		"CDSintrons"=CDSintrons,
		"threeUTRexons"=threeUTRexons,
		"threeUTRintrons"=threeUTRintrons
		)
	print('Done.')
	return(txAnnotation)
}

###### gene length
## returns the length of the genomic region region associated to each gene, 
## as defined in "make_annotation" function
## Arguments:
##   - "annotation" can be either "singleChr" or "genome"
##   - "singleChrName" must specify chromosome name (in case "singleChr" is selected)
gene_length <- function(annotation, singleChrName) {

	message('Creating annotation...')
	reduceLevels <- function(grange) {
		grange@seqnames <- droplevels(grange@seqnames)
		grange@seqinfo <- grange@seqinfo[levels(grange@seqnames)]
		grange }
	library(TxDb.Mmusculus.UCSC.mm9.knownGene)
	txdb <- TxDb.Mmusculus.UCSC.mm9.knownGene
	tx <- transcripts(txdb)
	names(tx) <- select(txdb, keys = tx$tx_name
		, keytype = "TXNAME", columns = "GENEID")$GENEID
	if( annotation == 'singleChr' )
	{######## single chromosome
		tx <- reduceLevels(tx[seqnames(tx)==singleChrName])
	}
	# remove NA named genes
	tx <- tx[!is.na(names(tx))]
	######## select a unique feature per each gene
	tx <- reduce(split(tx, names(tx)))
	# remove genes which still have more than one transcript after reducing...
	tx <- unlist(tx[elementNROWS(tx)==1])
	gl <- width(tx)
	names(gl) <- names(tx)
	return(gl)
}

##############################################
## Quantification of the ChIP-seq signal ##########
##############################################

###### bedfiles2granges
## given a vector (or a list) of bed files generated by Macs2, returns a
## list of GRanges containing peaks and metadata
## Arguments:
##   - "chipFiles" is a character vector of file names
##   - "range_summit" a logical. If TRUE the returned GRanges is just the
##     summit of the peak. Otherwise, all the peak width is reported and 
##     the summit information is given as metadata
bedfiles2granges <- function(chipFiles, range_summit=FALSE, skip=23) {

	# ####### list of granges from macs bedfiles
	message('Creating granges of ChIP peaks...')
	###### convert the bed files to GRanges
	bed2granges <- function(bed)
	{
		cnames <- colnames(bed)
		pval <- bed[,grep('pvalue',cnames)]
		qval <- bed[,grep('qvalue|FDR',cnames)]
		fold <- bed[,grep('fold',cnames)]
		if( any(cnames == 'abs_summit') ) {
			abs_summit <- bed[,'abs_summit']
			summit <- abs_summit - bed$start
		} else if( any(cnames == 'summit') ) {
			summit <- bed[,'summit']
			abs_summit <- bed$start + summit
		} else stop('no summit found')
		if( any(grepl('FDR',cnames)) )
		{
			qval[qval<1e-4] <- 1e-4
			qval <- -10*log10(qval)
		}
		if(range_summit) {
			grout <- GRanges(
				bed$chr
				, IRanges(abs_summit, abs_summit)
				, pval10log10=pval
				, fold_enrichment=fold
				, qval10log10=qval
				)
		} else {
			grout <- GRanges(
				bed$chr
				, IRanges(bed$start, bed$end)
				, summit=summit
				, pval10log10=pval
				, fold_enrichment=fold
				, qval10log10=qval
				)
		}
		return(grout)
	}
	bedList <- lapply(chipFiles, function(x) read.table(x, skip=skip, header=TRUE))
	names(bedList) <- sapply(basename(chipFiles), function(x) sub('\\....$','',x))
	grangesList <- lapply(bedList, bed2granges)

	return(grangesList)
}

###### makeChIPdata
## given a list of two GRanges and a list of two bam files addresses, 
## this function evaluates the enrichments of the BAM files on the set of the 
## union of the two peaks lists and on the promoters ( defined based on 
## a TxDB object and the upstream and downstream coodinates relative to 
## termination-start-site (TSS), see "make_annotation" function )
## Arguments:
##   - "peaks" a list of GRanges
##   - "BAM" a list of BAM files
##   - "promoter_limits" a numeric of length 2 containing the upstream 
## 		and downstream distances from the annotated TSS
##   - "txdb" a TxDb object
makeChIPdata <- function( peaks , BAM , promoter_limits=c(2e3,2e3), txdb )
{

	if( !require(compEpiTools) )
		stop('Required library "compEpiTools" missing.')

	libsizeNormFactors <- function( BAM ) 
	{
		if( !require(Rsamtools) )
			stop('Required library "Rsamtools" missing.')
		libsizes <- sapply(BAM, function(x) 
			countBam(x, param = ScanBamParam(flag = scanBamFlag(isUnmappedQuery = FALSE))
				)$records)/1e+06
	}

	## tss bound

	peaksUnion <- peaks[[1]]
	for( i in 2:length(peaks) ) peaksUnion <- union(peaksUnion, peaks[[i]])

	tss <- make_annotation('genome',annotationFeature="TSS",
		promoter_limits=promoter_limits, txdb=txdb)

	bound <- as.list(rep(numeric(0), length(tss)))
	fo <- findOverlaps(tss, peaksUnion)
	tmp <- split(subjectHits(fo), queryHits(fo))
	bound[as.numeric(names(tmp))] <- tmp
	bound <- sapply(bound, paste, collapse=',')

	## libsize 

	libsizes <- libsizeNormFactors(BAM)

	## quantify signal and normalize by library size

	signal_tss <- sapply(BAM, function(x) 
		GRcoverage(tss, x, Nnorm=FALSE, Snorm=FALSE))
	signal_tss <- t( t( signal_tss ) / libsizes )

	## zero share (on bound promoters)

	signal_tss_bound <- signal_tss[nchar(bound)>0,]
	share_factors <- colSums(signal_tss_bound)/median(colSums(signal_tss_bound))
	zeroshare <- share_factors[2]/share_factors[1]

	## transform in log2

	zeroshare <- log2(zeroshare)
	signal_tss <- log2(signal_tss)

	## remove non-finite values

	idx <- apply(is.finite(signal_tss),1,all)
	signal_tss <- signal_tss[idx,]
	tss <- tss[idx]
	bound <- bound[idx]

	## print number of finite tss

	finite_n <- length(which(idx))
	n <- length(idx)
	print(paste(finite_n, '/', n, 'finite tss observations (', round(finite_n/n*100,2), '% )' ))

	tss_signal <- data.frame(
		EntrezID = names(tss),
		bound = bound,
		low = signal_tss[,1],
		high = signal_tss[,2],
		log2ratio = signal_tss[,2] - signal_tss[,1],
		zeroshare = zeroshare,
		row.names = names(tss),
		stringsAsFactors=FALSE
		)

	##### peaks

	signal_peaks <- sapply(BAM, function(x) 
		GRcoverage(peaksUnion, x, Nnorm=FALSE, Snorm=FALSE))
	signal_peaks <- t( t( signal_peaks ) / libsizes )

	## zero share

	share_factors <- colSums(signal_peaks)/median(colSums(signal_peaks))
	zeroshare <- share_factors[2]/share_factors[1]

	## transform in log2

	zeroshare <- log2(zeroshare)
	signal_peaks <- log2(signal_peaks)
	rownames(signal_peaks) <- 1:nrow(signal_peaks)

	## remove non-finite values

	idx <- apply(is.finite(signal_peaks),1,all)
	signal_peaks <- signal_peaks[idx,]

	## print number of finite tss

	finite_n <- length(which(idx))
	n <- length(idx)
	print(paste(finite_n, '/', n, 'finite peaks observations (', round(finite_n/n*100,2), '% )' ))

	peaks_signal <- data.frame(
		peak_ID = rownames(signal_peaks),
		low = signal_peaks[,1],
		high = signal_peaks[,2],
		log2ratio = signal_peaks[,2] - signal_peaks[,1],
		zeroshare = zeroshare,
		row.names = rownames(signal_peaks),
		stringsAsFactors=FALSE
		)

	return(list(tss=tss_signal, peaks=peaks_signal))

}

###########################################
## Analysis of splicing alterations ############
###########################################

oneEventGenes <- function(dexds, padj_thresh, foldchange_thresh) {

	diff <- dexds$padj < padj_thresh & 
		abs(dexds$log2fold_t_c) > foldchange_thresh
	diff[is.na(diff)] <- FALSE
	tab <- table(dexds$groupID[diff])
	singleventgenes <- names(tab)[tab==1]
	dexds[dexds$groupID %in% singleventgenes,]

	}

annotateRelativePos <- function(xx) {

	x <- as.numeric(sapply(strsplit(xx, '_'),'[[',2))
	y <- substr(sapply(strsplit(xx, '_'),'[[',1),1,1)
	yf <- factor(y, levels=c('f','c','t'))

	n = length(x)
	if( n == 1 ) return('m')
	ann = character(n)
	if( n %% 2 == 0) # pari
	{
	midpoint <- n / 2
	ann[(midpoint+1):n] <- paste('t', rev( seq_along( (midpoint+1):n ) ), sep='_')
	ann[1:midpoint] <- paste('f', seq_along( 1:midpoint ), sep='_')
	} else {
	midpoint <- ceiling( n / 2 )
	ann[midpoint] <- 'm'
	ann[(midpoint+1):n] <- paste('t', rev( seq_along( (midpoint+1):n ) ), sep='_')
	ann[1:(midpoint-1)] <- paste('f', seq_along( 1:(midpoint-1) ), sep='_')
	}

	return(ann[order(order(yf,x))])

}

## make a matrix with log10 padj values multiplied by the sign of the induction
## which summarize significant events at the level of genes

eventsTableSignif <- function(dexDF, 
	signifPadj=.05, signifLog2fold=.1, max_saturate=5) {

	Padj <- dexDF[,grep('^padj',colnames(dexDF))]
	Log2fold <- dexDF[,grep('^log2fold',colnames(dexDF))]

	Table <- -log10(Padj)
	Table <- Table * sign(Log2fold)
	Table[is.na(Table)] <- 0

	signifEvents <- Padj < signifPadj & abs(Log2fold) > signifLog2fold
	signifEvents[is.na(signifEvents)] <- FALSE
	Table[!signifEvents] <- 0

	geneTable <- Table
	annotation <- dexDF[,intersect(colnames(dexDF),
		c("groupID","featureID","TSSdist","TESdist","TSSreldist","len","RPKMs","groupRPKMs"))]

	geneTable[geneTable<(-max_saturate)] <- -max_saturate
	geneTable[geneTable>(+max_saturate)] <- +max_saturate

	geneTableSignif <- cbind(annotation, geneTable)[rowSums(geneTable)!=0,]
	rownames(geneTableSignif) <- paste(geneTableSignif$groupID, geneTableSignif$featureID, sep=':')
	geneTableSignif

}

#################################
## heatmaps and clustering ############
#################################

######## aicKmeans
## clusters the rows of a matrix of data using kmenas algorithm, trying different
## numbers of clusters and choosing the best one in terms of the Akake 
## Information Criterion (AIC). 
## Arguments:
##   > "data" a matrix of items on rows and features on columns
##   > "maxN" the maximum number of clusters that will be tried
##     be tried (e.g. 10)
##   > "plot.out" a logical. If TRUE, plots the AIC of each clustering tried
##   > "stabilize" a logical. If true, tends to choose fewer clusters in the
##     case that AIC score don't increase significantly
##   > "by" a integer. Increment of the sequence
##   > "..." additional arguments to kmeans
aicKmeans <- function(data, maxN, stabilize=.05, by=1, seed=NULL, ...) {
	if(!is.null(seed)) set.seed(seed)
	clstAttempts <- seq(2,maxN,by)
	kmeansOut <- lapply(clstAttempts, function(centers) 
		{print(paste('Trying',centers,'clusters...')); kmeans(data, centers=centers, ...)})
	names(kmeansOut) <- clstAttempts
	# choose the number of clusters
	kmeansAIC <- function(fit){
		D <- fit$tot.withinss
		m <- ncol(fit$centers) # features per sample
		n <- length(fit$cluster) # sample size
		k <- nrow(fit$centers) # number of clusters
		return(2*m*k + D)
	}
	AICscores <- sapply(kmeansOut, kmeansAIC)
	nclust <- clstAttempts[which.min(AICscores)]
	if( stabilize>0 & (nclust>2) ) {
		upper_threshold <- (AICscores[1]-AICscores[as.character(nclust)])*stabilize + AICscores[as.character(nclust)]
		nclust_stable <- clstAttempts[min(which(AICscores<upper_threshold))]
	}
	AIC <- data.frame(n=clstAttempts,AICscores)
	if( stabilize>0 ) nclust <- nclust_stable
	print(paste('Chosen',nclust,'clusters'))
	return(list("stabilize"=stabilize, "AIC"=AIC, 
		"kmeans"=kmeansOut[[as.character(nclust)]]))
}

plotAIC <- function(aicKmeans) {
	stabilize <- aicKmeans$stabilize
	clstAttempts <- aicKmeans$AIC$n
	AICscores <- aicKmeans$AIC$AICscores
	names(AICscores) <- clstAttempts
	nclust <- clstAttempts[which.min(AICscores)]
	if( stabilize>0 & (nclust>2) ) {
		upper_threshold <- (AICscores[1]-AICscores[as.character(nclust)])*stabilize + AICscores[as.character(nclust)]
		nclust_stable <- clstAttempts[min(which(AICscores<upper_threshold))]
	}
	plot(clstAttempts, AICscores, xlab='clusters', ylab='AIC')
	points(nclust, AICscores[as.character(nclust)], pch=20, col='green')
	if( stabilize>0 ) {
		points(nclust_stable, AICscores[as.character(nclust_stable)], pch=20, col='darkgreen')
		abline(h=upper_threshold, lty=2)
	}
}

######## imageKmeans
## makes a heatmap using image function and use the output
## of aicKmeans function to cluster rows and to make breaks between 
## clusters.
## Arguments:
##   > "data" a matrix of items on rows and features on columns
##   > "aicKmeans" the output of "aicKmeans" run on data
##   > "..." additional arguments to image
##### >>>>>> manca la gestione degli NA e le colnames
imageKmeans <- function(data, aicKmeans, Colv=FALSE, labRow=FALSE, 
	col=colorRampPalette(c('darkblue','blue','beige','white','beige','red','darkred'))(21),
	sepcolor='black', sepwidth=1, clustOrd=NULL, clustToPlot=NULL, transpose=F, ...)
{

	aicKmeans <- aicKmeans$kmeans
	if( is.null(clustToPlot) ) clustToPlot <- 1:nrow(aicKmeans$centers)
	idx <- aicKmeans$cluster %in% clustToPlot
	aicKmeans$cluster <- aicKmeans$cluster[idx]
	aicKmeans$centers <- aicKmeans$centers[clustToPlot,]
	data <- data[idx,]
	if( !is.null(clustOrd) ) {
		ord <- 1:nrow(aicKmeans$centers)
		ord[clustOrd] <- 1:nrow(aicKmeans$centers)
		aicClustering <- factor(aicKmeans$cluster)
		levels(aicClustering) <- as.character(ord)
		aicClustering <- as.numeric(as.character(aicClustering))
		orderGenes <- order(aicClustering)
	} else {
		aicClustering <- aicKmeans$cluster
		orderGenes <- order(aicClustering)
	}
	heatmapInNumbers <- split(data.frame(data[orderGenes,]),aicClustering[orderGenes])
	dots <- list(...)
	if( any(names(dots) %in% "breaks" )) {
		breaks <- dots$breaks
		data[data<min(breaks)] <- min(breaks)
		data[data>max(breaks)] <- max(breaks)
	}
	if( any(names(dots) %in% "colsep" )) {
		colsepDef <- TRUE
		colsep <- dots$colsep
		dots$colsep <- NULL
	} else colsepDef <- FALSE
	if(!transpose) do.call(image, c(list(x=t(data[rev(orderGenes),]), col=col, xaxt='n', yaxt='n'), dots))
	else do.call(image, c(list(x=data[orderGenes,ncol(data):1], col=col, xaxt='n', yaxt='n'), dots))
	if( colsepDef ) {
		colMids <- seq(0,1,length.out=ncol(data))
		colBorders <- ( colMids[-1] + colMids[-length(colMids)] ) / 2
		if(!transpose) abline(v=colBorders[colsep], lwd=sepwidth)
		else abline(h=colBorders[colsep], lwd=sepwidth)
	}
	rowsep <- which(diff(aicClustering[order(aicClustering)])>0)
	rowMids <- seq(1,0,length.out=nrow(data))
	rowBorders <- ( rowMids[-1] + rowMids[-length(rowMids)] ) / 2
	if(!transpose) abline(h=rowBorders[rowsep], lwd=sepwidth)
	else abline(v=rowBorders[rowsep], lwd=sepwidth)

	heatmapInNumbers <- heatmapInNumbers

}

imageHclust <- function(data, Rowv=TRUE, Colv=FALSE, labRow=FALSE, 
	col=colorRampPalette(c('darkblue','blue','beige','white','beige','red','darkred'))(21),
	sepcolor='black', sepwidth=1,
	...)
{
	if( Colv ) ordCols <- hclust(dist(t(data)))$ord else ordCols <- 1:ncol(data)
	if( Rowv ) ordRows <- hclust(corDist(data))$ord else ordRows <- 1:nrow(data)
	data <- data[ordRows, ordCols]
	dots <- list(...)
	if( any(names(dots) %in% "breaks" )) {
		breaks <- dots$breaks
		data[data<min(breaks)] <- min(breaks)
		data[data>max(breaks)] <- max(breaks)
	}
	if( any(names(dots) %in% "colsep" )) {
		colsepDef <- TRUE
		colsep <- dots$colsep
		dots$colsep <- NULL
	} else colsepDef <- FALSE
	do.call(image, c(list(x=t(data), col=col, xaxt='n', yaxt='n'), dots))
	if( colsepDef ) {
		colMids <- seq(0,1,length.out=ncol(data))
		colBorders <- ( colMids[-1] + colMids[-length(colMids)] ) / 2
		abline(v=colBorders[colsep], lwd=sepwidth, col=sepcolor)
	}
	data <- data
}

######## makeColscale
## given a vector of colors and the intervals generates 
## the panel for the colorscale
## Arguments:
##   > "palette" the output of the linear modeling function
##   > "breaks" the output of the linear modeling function
makeColscale <- function(palette, breaks)
{
	par(mar=c(4,1,1,1))
	image(as.matrix(breaks), xaxt='n', yaxt='n', 
		col=palette)
	axis(1, at=seq(0,1,length.out=3), 
		labels=signif(quantile(breaks, seq(0,1,length.out=3)),2))	
}

######################################
##### Generation of reads counts ##########
########################################

importBamFile <- function(bamfile, countMultiMappingReads=FALSE, isPairedEnd=FALSE)
{
	if( countMultiMappingReads ) {
		message('Importing bamfile...')
		if( isPairedEnd )
			samTab <- readGAlignmentPairs(bamfile)
		else
			samTab <- readGAlignments(bamfile)
	} else { # countMultiMappingReads==FALSE
		message('Importing bamfile...')
		if( isPairedEnd )
			samTab <- readGAlignmentPairs(bamfile, param=ScanBamParam(tagFilter=list('NH'=1)))
		else
			samTab <- readGAlignments(bamfile, param=ScanBamParam(tagFilter=list('NH'=1)))
		if( length(samTab)==0 ) stop('No alignments imported.')
	}
	return(samTab)
}

countFeat <- function(annotation, samTab, allowMultiOverlap=FALSE, strandSpecific=FALSE, type='any')
{
	foOut <- findOverlaps(annotation,samTab,ignore.strand=!strandSpecific,type=type)
	onfeature <- unique(subjectHits(foOut))

	if( allowMultiOverlap ) {
		Unassigned_Ambiguity <- 0
		Assigned_Reads <- length(onfeature)
	} else {
		ambiguous_reads <- duplicated(subjectHits(foOut))|duplicated(subjectHits(foOut),fromLast=TRUE)
		Unassigned_Ambiguity <- length(unique(subjectHits(foOut)[ambiguous_reads]))
		Assigned_Reads <- length(which(!ambiguous_reads))
		foOut <- foOut[!ambiguous_reads]
	}
	counts <- table(factor(queryHits(foOut), levels=1:queryLength(foOut)))
	names(counts) <- names(annotation)

	Unassigned_NoFeatures <- length(samTab) - length(onfeature)
	logical_onfeature <- seq_along(samTab) %in% onfeature

	stat <- c(
			Unassigned_Ambiguity=Unassigned_Ambiguity,
			Assigned_Reads=Assigned_Reads,
			Unassigned_NoFeatures=Unassigned_NoFeatures
		)

	return(list(onfeature=logical_onfeature, counts=counts, stat=stat))

}

#####################################
#####################################
####### RNAPII ######################
#####################################
#####################################

######## rnapii_model
## given temporal profiles of RNAPII occupancy over TSS, GB and TES regions of a
## gene, as well as synthesis rates of that gene, it models the behavior of RNAPII
## in terms of 4 rates (loading [p1] and frquency of release from TSS [p2], GB [p3] and TES [p4]
## regions). Each rate can forced to be either constant over time or shaped as an impulse model.
## Arguments:
##   > "tpts" numeric, the time points of the experiment
##   > "gene" matrix, contains the RNAPII occupancies over time (rows) and features (columns)
##   > "model_type" character, definenes the shape of the rates, "K" stay for constant and
##        "V" for variable (impulse). A character of "KKVK" means that p3 is variable.
rnapii_model <- function(tpts, gene, geneVar=NULL, model_type, seed=NULL, maxit=20000, constrained=FALSE) {

	gDer <- rbind(0,diff(gene[,-4])/diff(tpts))

	## initial guess of the rates

	pol2guess <- t(sapply(1:nrow(gene), function(j) {
		startPar <- c(gene[j,4], gene[j,4]/gene[j,1], 
			gene[j,4]/gene[j,2], gene[j,4]/gene[j,3])
		constrOptim(startPar, errorfun, 
			gene=gene, gDer=gDer, j=j, grad=NULL, 
			ui=diag(4)[2:4,], ci=rep(0,3))$par
		}))

	## interpolate smooth functions according to the model chosen

	model_type <- strsplit(model_type, '')[[1]]

	rates <- lapply(c(p1=1,p2=2,p3=3,p4=4), function(i) 
		switch(model_type[i], 
			"K"=chooseConstant(tpts, pol2guess[,i]),
			"S"=chooseSigmoid(tpts, pol2guess[,i], nInit=100, log_transf=TRUE, seed=seed),
			"I"=chooseImpulse(tpts, pol2guess[,i], nInit=100, log_transf=TRUE, seed=seed, 
				constrained=if(i==1) 1 else 2),
			stop('rnapii_model: model argument must be a string of either "K" (constant), "S" (sigmoid) or "I" (impulse)')
			)
		)

	## optimize rates rates

	initparams <- unlist(lapply(rates, '[[', 'params'))

	if( constrained ) {

		n <- sapply(model_type, switch, "K"=1, "S"=4, "I"=6)
		ui <- lapply(seq_along(model_type), function(i)
			if( i == 1 ) {
				switch(model_type[[i]],
					"K"=numeric(0),
					"S"=rbind(
						c(rep(0,sum(n[seq_along(n)<i])),c(0,0,0,1),rep(0,sum(n[seq_along(n)>i]))),
						c(rep(0,sum(n[seq_along(n)<i])),c(0,0,0,-1),rep(0,sum(n[seq_along(n)>i])))
						),
					"I"=rbind(
						c(rep(0,sum(n[seq_along(n)<i])),c(0,0,0,0,0,1),rep(0,sum(n[seq_along(n)>i]))),
						c(rep(0,sum(n[seq_along(n)<i])),c(0,0,0,0,0,-1),rep(0,sum(n[seq_along(n)>i])))
						)
					)
			} else {
				switch(model_type[[i]],
					"K"=c(rep(0,sum(n[seq_along(n)<i])),1,rep(0,sum(n[seq_along(n)>i]))),
					"S"=rbind(
						c(rep(0,sum(n[seq_along(n)<i])),c(1,0,0,0),rep(0,sum(n[seq_along(n)>i]))),
						c(rep(0,sum(n[seq_along(n)<i])),c(0,1,0,0),rep(0,sum(n[seq_along(n)>i]))),
						c(rep(0,sum(n[seq_along(n)<i])),c(0,0,0,1),rep(0,sum(n[seq_along(n)>i]))),
						c(rep(0,sum(n[seq_along(n)<i])),c(0,0,0,-1),rep(0,sum(n[seq_along(n)>i])))
						),
					"I"=rbind(
						c(rep(0,sum(n[seq_along(n)<i])),c(1,0,0,0,0,0),rep(0,sum(n[seq_along(n)>i]))),
						c(rep(0,sum(n[seq_along(n)<i])),c(0,1,0,0,0,0),rep(0,sum(n[seq_along(n)>i]))),
						c(rep(0,sum(n[seq_along(n)<i])),c(0,0,1,0,0,0),rep(0,sum(n[seq_along(n)>i]))),
						c(rep(0,sum(n[seq_along(n)<i])),c(0,0,0,0,0,1),rep(0,sum(n[seq_along(n)>i]))),
						c(rep(0,sum(n[seq_along(n)<i])),c(0,0,0,0,0,-1),rep(0,sum(n[seq_along(n)>i])))
						)
					)
			})
		ui <- do.call('rbind',ui)
		ci <- lapply(seq_along(model_type), function(i)
			if( i == 1 ) {
				switch(model_type[[i]],
					"K"=numeric(0),
					"S"=c(0,-10),
					"I"=c(0,-10)
					)
			} else {
				switch(model_type[[i]],
					"K"=0,
					"S"=c(0,0,0,-10),
					"I"=c(0,0,0,0,-10)
					)
			})
		ci <- unlist(ci)
		optimOut <- constrOptim(initparams, oderrorfun, grad=NULL, 
			tpts=tpts, rates=rates, gene=gene, var=geneVar,
			ui=ui, ci=ci,
			control=list(maxit=maxit))
	} else {
		optimOut <- optim(initparams, oderrorfun, 
			tpts=tpts, rates=rates, gene=gene, var=geneVar,
			control=list(maxit=maxit))
	}

	## return optimized rates 
	
	c(
		params2rates(optimOut$par, rates),
		value=optimOut$value,
		df=length(gene)-length(optimOut$par),
		convergence=optimOut$convergence
		)
		
}

######## rnapii_plot
## given the temporal profiles of RNAPII occupancy over TSS, GB and TES regions of a
## gene, as well as synthesis rates of that gene, and the model output of rnapii_model
## this function make 4 plots:
## 1 - gene experimental and modeled profile
## 2 - RNAPII flux temporal profile
## 3 - RNAPII flux temporal profile (zoom)
## 4 - log2 fold change of the rates over time
##   > "tpts" numeric, the time points of the experiment
##   > "gene" matrix, contains the RNAPII occupancies over time (rows) and features (columns)
##   > "optimized_model" list, the output of the rnapii_model function
##   > "xaxis" logical, whether to draw labels on x-axis or not.
rnapii_plot <- function(tpts, pseudogene, optimized_model, xaxis=TRUE, 
	lwd=2, lty=1, model_norm=NA, panels=1:4, add=FALSE) {

	# time

	log_shift <- find_tt_par(tpts)
	tptslog <- time_transf(tpts, log_shift)
	tptsfine <- seq(min(tpts),max(tpts),length.out=400)
	tptsfinelog <- time_transf(tptsfine, log_shift)	

	p1 <- evalModel(tptsfine,optimized_model[[1]])

	if( 1 %in% panels ) {

		#2 - FLUX (p1)
		if(add==FALSE) {
			plot(tptsfinelog, p1 <- evalModel(tptsfine,optimized_model[[1]]), type='l', xaxt='n', 
				ylim=c(min(0,min(p1)),max(p1)),lwd=lwd, lty=lty, ylab='RNAPII flux', xlab='')
		} else {
			lines(tptsfinelog, p1 <- evalModel(tptsfine,optimized_model[[1]]),lwd=lwd, lty=lty)
		}
		points(tptslog, evalModel(tpts,optimized_model[[1]]), pch=20, cex=1.5)
		abline(lwd=lwd, h=0, lty=3, col='grey')
		abline(lwd=lwd, h=p1[1], lty=3)
		if( xaxis & !add ) 
			axis(1, at=tptslog, labels=signif(tpts,2)) else 
				axis(1, at=tptslog, labels=rep('',length(tpts)))

	}

	if( 2 %in% panels ) {

		#3 flux - zoom
		scale <- .7
		if( p1[1] < p1[length(p1)] ) 
			ylim <- c(p1[1]*scale,p1[length(p1)]/scale) else
				ylim <- c(p1[1]/scale,p1[length(p1)]*scale)[2:1]
		if( add==FALSE ) {
			plot(tptsfinelog, p1, type='l', xaxt='n', 
				ylim=ylim, lwd=lwd, lty=lty, ylab='', cex=2, xlab='')
		} else {
			lines(tptsfinelog, p1, lwd=lwd, lty=lty)
		}
		points(tptslog, evalModel(tpts,optimized_model[[1]]), pch=20, cex=1.5)
		abline(lwd=lwd, h=0, lty=3, col='grey')
		abline(lwd=lwd, h=p1[1], lty=3)
		if( xaxis & !add ) 
			axis(1, at=tptslog, labels=signif(tpts,2)) else 
				axis(1, at=tptslog, labels=rep('',length(tpts)))

	}

	if( 3 %in% panels ) {

		#4 - rates
		if( add==FALSE ) {
			matplot(tptsfinelog, 
				sapply(optimized_model[2:4], function(x) {y <- evalModel(tptsfine,x); log2(y/y[1])}), 
				type='l', col=2:4, xaxt='n', lwd=lwd, lty=lty, ylab='log2ratio to 0h', xlab='')			
		} else {
			matlines(tptsfinelog, 
				sapply(optimized_model[2:4], function(x) {y <- evalModel(tptsfine,x); log2(y/y[1])}), 
				col=2:4, lwd=lwd, lty=lty)
		}
		matpoints(tptslog, 
			sapply(optimized_model[2:4], function(x) {y<-evalModel(tpts,x); log2(y/y[1])}), 
			pch=20, cex=1.5, lty=1, col=2:4)
		abline(lwd=lwd, h=0, lty=3, col='grey')
		if( xaxis & !add ) 
			axis(1, at=tptslog, labels=signif(tpts,2)) else 
				axis(1, at=tptslog, labels=rep('',length(tpts)))
				
	}

	if( 4 %in% panels ) {

		#1 - RNAPII experimental and modeled profiles
		modeled_profile <- model(tptsfine, optimized_model)
		if( !is.na( model_norm[1] ) ) modeled_profile <- t( t( modeled_profile ) * model_norm )
		if( add == FALSE ) {
			matplot(tptsfinelog, modeled_profile, 
				type='l', lty=lty, lwd=lwd, xaxt='n', ylab='', xlab='', 
				col = c('cyan','magenta','orange','saddlebrown'),
				ylim=range(c(modeled_profile,pseudogene)))
		} else {
			matlines(tptsfinelog, modeled_profile,
				lty=lty, lwd=lwd, col = c('cyan','magenta','orange','saddlebrown'))			
		}
		matpoints(tptslog, pseudogene, 
			col = c('cyan','magenta','orange','saddlebrown'),
			pch=20, cex=1.5) #, type='b', lty=2)
		if( xaxis & !add ) 
			axis(1, at=tptslog, labels=signif(tpts,2)) else 
				axis(1, at=tptslog, labels=rep('',length(tpts)))

	}
			
}

#####################################
####### RNAPII modeling #############
########## secondary functions ######
############# (undocumented) ########
#####################################

########### estimation of first guess rates

pol2system <- function(x, gene, gDer, j) {
	y <- numeric(4)
	y[1] <- gDer[j,1] - x[1]           + x[2]*gene[j,1]
	y[2] <- gDer[j,2] - x[2]*gene[j,1] + x[3]*gene[j,2]
	y[3] <- gDer[j,3] - x[3]*gene[j,2] + x[4]*gene[j,3]
	y[4] <- gene[j,4] - x[3]*gene[j,2]
	y
}

errorfun <- function(par, gene, gDer, j) sum(pol2system(par, gene, gDer, j)^2)

########### chhoseModel based differential equation system

evalModel <- function(x, cmOut) {
	if(cmOut$log_transf) x <- time_transf(x, cmOut$log_shift)
	cmOut$fun$value(x, cmOut$params)
}

odefun <- function(t, C, rates) {
	x <- C[1]; y <- C[2]; z <- C[3]
	k1 <- evalModel(t, rates[[1]]); k2 <- evalModel(t, rates[[2]])
	k3 <- evalModel(t, rates[[3]]); k4 <- evalModel(t, rates[[4]])
	F1 <- k1   - k2*x
	F2 <- k2*x - k3*y
	F3 <- k3*y - k4*z
	list(c(F1,F2,F3))
}

model <- function(tpts, rates) {
	x0 <- evalModel(0, rates[[1]])/evalModel(0, rates[[2]])
	y0 <- evalModel(0, rates[[1]])/evalModel(0, rates[[3]])
	z0 <- evalModel(0, rates[[1]])/evalModel(0, rates[[4]])
	C0 <- c(x0,y0,z0)
	odeOut <- ode(C0, tpts, odefun, rates, method='ode23')
	synthesis <- evalModel(tpts, rates[[3]])*odeOut[,3]
	cbind(odeOut[,2:4], synthesis)
}

params2rates <- function(params, rates) {
	df <- sapply(rates, '[[', 'df')
	splitparams <- split(params, rep(c('p1','p2','p3','p4'), times=df))
	for(i in 1:4) rates[[i]]$params <- splitparams[[i]]
	return(rates)
}

oderrorfun <- function(params, tpts, rates, gene, var=NULL) {
	if(is.null(var)) var <- gene
	rates <- params2rates(params, rates)
	sum(((model(tpts, rates)-gene)^2)/var)
}

############### log time 

find_tt_par <- function(tpts)
{
	cvLogTpts <- function(a , tpts) {
		newtime <- log2(tpts + a )
		stats::sd(diff(newtime)) / mean(diff(newtime))
	}
	if( length(tpts)>2 )
		return(optimize(f=cvLogTpts, interval=c(0,5), tpts=tpts )$minimum)
	else
		return(1)
}

time_transf <- function(t, log_shift) 
{
	newtime <- log2(t+log_shift)
	return(newtime)
} 

################ fit profiles with smooth functions

newPointer <- function(inputValue){  
	object=new.env(parent=globalenv())  
	object$value=inputValue  
	class(object)='pointer'
	return(object)  
}

## impulse model

impulseModel <- function(x, par) 
	1/par[2]*(par[1]+(par[2]-par[1])*(1/(1+exp(-par[6]*(x-par[4])))))*
		(par[3]+(par[2]-par[3])*(1/(1+exp(par[6]*(x-par[5])))))
impulseModel <- cmpfun(impulseModel)
impulseModelP <- newPointer(impulseModel)

chooseImpulse <- function(tpts, experiment, na.rm=TRUE
	, log_transf=FALSE, nInit=10, nIter=500, seed=NULL, constrained=0)
{

	im.parguess <- function(tpts , values ) {
	    ntp   <- length(tpts)
	    peaks <- which(diff(sign(diff(values)))!=0)+1
	    if( length(peaks) == 1 ) peak <- peaks
	    if( length(peaks)  > 1 ) peak <- sample(peaks, 1)
	    if( length(peaks) == 0 ) peak <- round(length(tpts)/2)
		initial_values <- runif( 1, min=min(values[1:3])
			, max=max(values[1:3]))
		intermediate_values <- values[peak]
		if( intermediate_values==0 ) intermediate_values <- mean(values[seq(peak-1,peak+1)])
		end_values <- runif( 1, min=min(values[(ntp-2):ntp])
			, max=max(values[(ntp-2):ntp]))
		time_of_first_response  <- tpts[peak-1]
		time_of_second_response <- tpts[peak+1]
		slope_of_response <- diff(range(tpts)) / 
			(time_of_second_response-time_of_first_response)
	    return(c(h0=initial_values, h1=intermediate_values
	    	, h2=end_values, t1=time_of_first_response
	    	, t2=time_of_second_response, b=slope_of_response))
	}
	#
	im.chisq <- function(par, tpts, experiment, impulseModel) 
	{
		 model <- impulseModel(tpts, par)
		 sum((experiment - model )^2)
	}
	#
	im.optim.chisq <- function(tpts, experiment, ninit=10
		, maxit=500) 
		sapply(1:ninit, function(x) 
	 		tryCatch(optim(
	 			par=im.parguess(tpts, experiment)
	 			, fn=im.chisq, tpts=tpts
	 			, experiment=experiment
	 			, impulseModel=impulseModel
	 			, control=list(maxit=maxit)
	 			), error=function(e) optimFailOut(e)))

	# remove missing values
	if( na.rm) {
		idx <- is.finite(experiment)
		tpts <- tpts[idx]
		experiment <- experiment[idx]
	}

	if( log_transf ) {
		log_shift <- find_tt_par(tpts)
		tpts <- time_transf(tpts, log_shift)
	} else {
		log_shift <- NaN
	}
	
	if( !is.null(seed) ) set.seed(seed)
	outIM  <- im.optim.chisq(tpts=tpts, experiment=experiment, 
		ninit=nInit, maxit=nIter)
	if( constrained == 1 ) {
		idx <- abs(apply(outIM,2,'[[','par')[6,])<10
		if(!any(idx)) stop('No constrained solutions found, try with larger nInit.')
		outIM <- outIM[,idx]		
	} else if( constrained == 2 ) {
		idx <- apply(apply(outIM,2,'[[','par')[1:3,]>0,2,all) & 
			abs(apply(outIM,2,'[[','par')[6,])<10
		if(!any(idx)) stop('No constrained solutions found, try with larger nInit.')
		outIM <- outIM[,idx]
	}
	bestIM <- which.min(unlist(outIM[2,]))
	dfIM <- length(outIM[,bestIM]$par)

	return(list(
		fun=impulseModelP, 
		params=as.vector(outIM[,bestIM]$par), 
		df=dfIM, 
		log_transf=log_transf, 
		log_shift=log_shift))

}

### sigmoid model

sigmoidModel <- function(x, par) 
	par[1]+(par[2]-par[1])*(1/(1+exp(-par[4]*(x-par[3]))))
sigmoidModel <- cmpfun(sigmoidModel)
sigmoidModelP <- newPointer(sigmoidModel)

chooseSigmoid <- function(tpts, experiment, na.rm=TRUE
	, log_transf=FALSE, nInit=10, nIter=500, seed=NULL)
{

	sm.parguess <- function(tpts , values ) {
	    # values = expressions.avgd(eD)
	    # tp = tpts(eD)

		time_span <- diff(range(tpts))
		# sample the time uniformely
		time_of_response <- runif( 1, min=min(tpts), max=max(tpts))
		# slope of response must be high if the time of response is close to one
		# of the two boundaries
		distance_from_boundary <- min(time_of_response - min(tpts)
				, max(tpts) - time_of_response)
		slope_of_response <- time_span / distance_from_boundary
	    ntp   <- length(tpts)
		initial_values <- runif( 1, min=min(values[1:3])
			, max=max(values[1:3]))
		end_values <- runif( 1, min=min(values[(ntp-2):ntp])
			, max=max(values[(ntp-2):ntp]))
		#
	    return(c(h0=initial_values, h1=end_values, t1=time_of_response
	    	, b=slope_of_response))
	}

	sm.chisq <- function(par, tpts, experiment, sigmoidModel) 
	{
		 model <- sigmoidModel(tpts, par)
		 sum((experiment - model )^2)
	}
	#
	sm.optim.chisq <- function(tpts, experiment, ninit=10
		, maxit=500) 
		sapply(1:ninit, function(x) 
				tryCatch(optim(
					par=sm.parguess(tpts, experiment)
					, fn=sm.chisq, tpts=tpts
					, experiment=experiment
					, sigmoidModel=sigmoidModel
					, control=list(maxit=maxit)
					), error=function(e) optimFailOut(e))) 

	# remove missing values
	if( na.rm) {
		idx <- is.finite(experiment)
		tpts <- tpts[idx]
		experiment <- experiment[idx]
	}

	if( log_transf ) {
		log_shift <- find_tt_par(tpts)
		tpts <- time_transf(tpts, log_shift)
	} else {
		log_shift <- NaN
	}
	
	if( !is.null(seed) ) set.seed(seed)
	outSM  <- sm.optim.chisq(tpts=tpts, experiment=experiment, 
		ninit=nInit, maxit=nIter)
	bestSM <- which.min(unlist(outSM[2,]))
	dfSM <- length(outSM[,bestSM]$par)

	return(list(
		fun=sigmoidModelP, 
		params=as.vector(outSM[,bestSM]$par), 
		df=dfSM, 
		log_transf=log_transf, 
		log_shift=log_shift))

}

### constant

constantModel <- function(x , par ) rep(par , length(x) )
constantModelP <- newPointer(constantModel)

chooseConstant <- function(tpts, experiment, na.rm=TRUE)
{

	# remove missing values
	if( na.rm) {
		idx <- is.finite(experiment)
		tpts <- tpts[idx]
		experiment <- experiment[idx]
	}

	# there is no need to transform the time for constant models
	log_transf <- FALSE
	log_shift <- NaN

	bestKpar <- mean(experiment)
	dfK <- 1

	return(list(
		fun=constantModelP , 
		params=bestKpar, 
		df=dfK, 
		log_transf=log_transf, 
		log_shift=log_shift
		))

}

##############################################
###### processing ############################
##############################################

maxResp <- function(x) {
	l2rX <- log2ratio(x)
	l2rX[is.na(l2rX)] <- 0
	l2rX[cbind(1:nrow(l2rX),apply(abs(l2rX), 1, which.max))]
}

halfResponse <- function(responsevar) {
	idxMax <- which.max(abs(responsevar-responsevar[1]))
	maxresponse <- responsevar[idxMax[1]]
	halfresponse <- (responsevar[1]+maxresponse)/2
	idxHalf <- which(abs(diff(responsevar<halfresponse))==1)[1]
	return(idxHalf)
}

forceT0K2 <- function(ids) {
	ids@model <- .bestModel(ids@model)
	ids <- makeModelRates(ids)
	ToKcvals <- viewModelRates(ids, 'processing')[,1]
	for(i in 1:length(ids@model@ratesSpecs)) {
		ids@model@ratesSpecs[[i]][[1]]$gamma <- list(
			type="constant"
			, fun=.constantModelP
			, params=ToKcvals[i]
			, pval=1
			, df=1
			)
	}
	ids <- makeModelRates(ids)
	return(ids)
}

halfResponses <- function(ids) {
	library(deSolve)
	if( any(sapply(ids@model@ratesSpecs, length)!=1) ) {
		bmods <- .bestModel(ids@model)
	} else bmods <- ids@model
	fineTpts <- seq(0, 16, by=1/60)
	bmodse <- lapply(1:length(bmods@ratesSpecs), function(i)
		try(.makeModel(fineTpts, 
			bmods@ratesSpecs[[i]][[1]], 
			.find_tt_par(ids@tpts), 
			.time_transf, 
			ode, 
			.rxnrate))
		)
	return(data.frame(
		synthesis = sapply(bmodse, function(x) {
			tryCatch({
				responsevar <- x$alpha
				idx <- halfResponse(responsevar)
				halfresponseTime <- fineTpts[idx]*60
				}, error=function(e) NaN)
			}),
		processing = sapply(bmodse, function(x) {
			tryCatch({
				responsevar <- x$gamma
				idx <- halfResponse(responsevar)
				halfresponseTime <- fineTpts[idx]*60
				}, error=function(e) NaN)
			}),
		premrna = sapply(bmodse, function(x) {
			tryCatch({
				responsevar <- x$preMRNA
				idx <- halfResponse(responsevar)
				halfresponseTime <- fineTpts[idx]*60
				}, error=function(e) NaN)
			}),
		total = sapply(bmodse, function(x) {
			tryCatch({
				responsevar <- x$total
				idx <- halfResponse(responsevar)
				halfresponseTime <- fineTpts[idx]*60
				}, error=function(e) NaN)
			})
		))
}

#########################################
## from INSPEcT internal functions ##########
######################################

.bestModel <- function(object, bTsh=NULL, cTsh=NULL) {
		## in case bTsh or bTsh are provided set them as
		# permanent for the object
		if( is.null(bTsh) )
			bTsh <- object@params$thresholds$brown
		if( is.null(cTsh) )
			cTsh <- object@params$thresholds$chisquare
		## calculate ratePvals
		ratePvals <- ratePvals(object, cTsh)
		## give a discrete classification per each rate per each gene
		# according to the brown's threshold for the pvalues
		acceptedVarModels <- sapply(1:3, function(i) ratePvals[,i]<bTsh[i])
		if( !is.matrix(acceptedVarModels) )
			acceptedVarModels <- t(as.matrix(acceptedVarModels))
		# nonResolvedGenes <- apply(acceptedVarModels, 1, 
		# 	function(x) all(is.na(x)))
		acceptedVarModels[is.na(acceptedVarModels)] <- FALSE
		rownames(acceptedVarModels) <- rownames(ratePvals)
		colnames(acceptedVarModels) <- colnames(ratePvals)
		geneClass <- apply(acceptedVarModels, 1, 
			function(accepted) paste(c('a','b','c')[accepted],collapse=''))
		geneClass[geneClass==''] <- '0'
		# geneClass[nonResolvedGenes] <- NA
		## retrive all the models
		ratesSpecs <- object@ratesSpecs
		## select the best model (according to geneClass) per gene
		nGenes <- length(ratesSpecs)
		object@ratesSpecs <- lapply(1:nGenes, 
			function(i) ratesSpecs[[i]][geneClass[i]])
		return(object)
	}

.makeModel <- function(tpts, hyp, log_shift, .time_transf, ode, .rxnrate)
{
	params <- list()
	params$alpha <- function(x) 
		hyp$alpha$fun$value(.time_transf(x, log_shift), hyp$alpha$par)
	params$beta  <- function(x) 
		hyp$beta$fun$value(.time_transf(x, log_shift), hyp$beta$par)
	params$gamma <- function(x) 
		hyp$gamma$fun$value(.time_transf(x, log_shift), hyp$gamma$par)
	cinit <- c(params$alpha(tpts[1]) / params$gamma(tpts[1]), 
		params$alpha(tpts[1]) / params$beta(tpts[1]) + 
			params$alpha(tpts[1]) / params$gamma(tpts[1]))
	names(cinit) <- c('p', 't')
	model <- as.data.frame(
		ode(y=cinit, times=tpts, func=.rxnrate, parms=params))
	model$alpha <- params$alpha(tpts)
	model$beta  <- params$beta(tpts)
	model$gamma <- params$gamma(tpts)
	colnames(model)[2:3] <- c('preMRNA','total')
	return(model)
}

.find_tt_par <- function(tpts)
{
	cvLogTpts <- function(a , tpts) {
		newtime <- log2(tpts + a )
		stats::sd(diff(newtime)) / mean(diff(newtime))
	}
	if( length(tpts)>2 )
		return(optimize(f=cvLogTpts, interval=c(0,5), tpts=tpts )$minimum)
	else
		return(1)
}

.time_transf <- function(t, log_shift) 
{
	newtime <- log2(t+log_shift)
	return(newtime)
} 

.rxnrate <- function(t,c,parms){
 
	# rate constant passed through a list called parms
	alpha <- parms$alpha
	beta  <- parms$beta
	gamma <- parms$gamma

	# derivatives dc/dt are computed below
	r=rep(0,length(c))
	r[1] <- alpha(t) - gamma(t) * c["p"]
	r[2] <- alpha(t) - beta(t) * (c["t"] - c["p"] )

	# c is the concentration of species
	
	# the computed derivatives are returned as a list
	# order of derivatives needs to be the same as the order of species in c
	return(list(r))
 
}

.newPointer <- function(inputValue){  
	object=new.env(parent=globalenv())  
	object$value=inputValue  
	class(object)='pointer'
	return(object)  
}

.constantModel <- function(x , par ) rep(par , length(x) )
.constantModelP <- .newPointer(.constantModel)
