################################################################
## Analysis of replication origins, nascent strands data from
## Christelle Cayrou and Marcel Mechali.
##
## R script by Jacques van Helden
##
## Running this script requires to first run the script config.R
##    source('http://www.bigre.ulb.ac.be/courses/statistics_bioinformatics/R-files/config.R')

## Load some libraries
source('http://pedagogix-tagc.univ-mrs.fr/courses/statistics_bioinformatics/R-files/config.R')
## source('/Users/jvanheld/statistics_bioinformatics/R-files/config.R')
source(file.path(dir.util, 'util_chip_analysis.R'))
source(file.path(dir.util, 'microarray_util.R'))
export.formats.plots <- c("png", "pdf")

## Redefine the main directory (this should be adapted to local configuration)
dir.main <- "~/replication_origins"
#dir.main <- getwd()

################################################################
## Read arguments from the command line.
##
## Arguments passed on the command line will over-write the default
## arguments specified above.
args = commandArgs(trailingOnly=TRUE);

if(length(args)==0){
  stop("No arguments supplied. Mandatory: file.peaks=[position_analysis_output_file] ")
}else{
  print("Parsing command-line arguments")
  print(args)
  for(i in 1:length(args)){
    eval(parse(text=args[[i]]))
  }
}

## swembl.R <- 0.003
## test <- "F4_PN"
## ctrl <- "input"
## prefix <- paste("SWEMBL_",test,"_vs_",ctrl,"_R", swembl.R, sep='')
## dir.peaks <- file.path("analysis/peaks/SWEMBL", prefix)
## list.files(dir.peaks)
## file.peaks <- file.path(dir.peaks, paste(sep='', prefix, '_peaks.bed'))


## Check that input file has been specified
if (!exists("file.peaks")) {
  stop("Missing mandatory argument: file.peaks=[position_analysis_output_file] ")
}
verbose(paste("Input file", file.peaks), 1)


## Define path of directories and files relative to the main directory
dir.peaks.relative <- dirname(file.peaks)
dir.peaks <- file.path(dir.main, dir.peaks.relative)
verbose(paste("Input directory", dir.peaks), 1)
verbose(paste("Peaks directory", dir.peaks), 1)


## Define suffix for output files
if (!exists("suffix")) {
  suffix <- basename(file.peaks)
  suffix <- sub(".bed$", "", basename(file.peaks), ignore.case = TRUE, perl = TRUE)
}
verbose(paste("Suffix for output files", suffix), 1)

## Load peak coordinates
setwd(dir.main)
peaks <- read.delim(file.peaks, comment.char='#')
peak.nb <- nrow(peaks)
verbose(paste("Read", peak.nb, "peaks"))

colnames(peaks)[1:3] <- c("chrom", "start", "end")

## Compute peak lengths
peaks$size <- peaks$end - peaks$start +1
range(peaks$size)
x11(width=7, height=5)
hist(peaks$size, breaks=seq(from=0, to=max(peaks$size)+50, by=50), col='#BBBBBB', main=suffix, xlab="Peak size", ylab="Nb peaks")
setwd(dir.peaks); export.plot(file.prefix=paste(sep="", suffix, '_size_distrib'), export.formats=export.formats.plots, width=7,height=5)

## Sort peaks by chromosome
chromosomes <- as.vector(unique(peaks$chrom))

## Peaks per chromosome
peaks.per.chrom <- table(peaks$chrom)
x11(width=5, height=7)
barplot(peaks.per.chrom, horiz=TRUE, las=1, main=suffix)
setwd(dir.peaks); export.plot(file.prefix=paste(sep="", suffix, '_chrom_distrib'), export.formats=export.formats.plots, width=5,height=7)


## Compute inter-peak distances
verbose("Computing inter-peak distances")
peaks$upstream.dist2 <- NA
for (chr  in chromosomes) {
  chr.peaks <- peaks$chrom == chr
  start <- peaks[chr.peaks, "start"]
  end <- peaks[chr.peaks, "end"]
  chr.peaks[which(chr.peaks==TRUE)[1]] <- FALSE
  peaks[chr.peaks,"upstream.dist2"] <- start[2:length(start)] - end[1:(length(end)-1)]-1
}

#peaks$upstream.dist <- 0
#peaks[2:peak.nb, "upstream.dist"] <- peaks[2:peak.nb, "start"]  - peaks[1:(peak.nb-1), "end"] -1
#peaks[peaks$upstream.dist < 0, "upstream.dist"] <- 0

## Compute distribution of inter-peak distances
distances <- na.omit(peaks$upstream.dist)
dist.values <- sort(unique(distances)) ## Unique distance values

dist.table <- table(sort(distances))
dist.matrix <- as.data.frame(as.matrix(dist.table)[,])

breaks <- c(dist.values-0.5, max(dist.values)+0.5)
h <- hist(na.omit(peaks$upstream.dist), breaks=breaks, plot=FALSE)
inter.peak.dist.distrib <- data.frame("distance"=dist.values,
                                      "counts"=h$counts,
                                      "counts2"=as.vector(dist.matrix),
                                      "dCDF"=rev(cumsum(rev(h$counts)))
                                      )
inter.peak.dist.distrib$dist.kb=inter.peak.dist.distrib$distance/1000
inter.peak.dist.distrib$dist.Mb=inter.peak.dist.distrib$distance/1e6

## plot distributions of inter-peak distances
x11(width=6, height=12)
par(mfrow=c(3,1))

## ## Density 
## plot(inter.peak.dist.distrib$dist.kb,
##      inter.peak.dist.distrib$counts,
##      type="l", lwd=2, col="blue", xlab="Inter-peak distance (kb)", ylab="Number of peaks withb D >= d (dCDF)", main=suffix, panel.first=grid())

## ## Density in log scale
## plot(inter.peak.dist.distrib$dist.kb,
##      inter.peak.dist.distrib$counts,
##      type="l", log="xy", lwd=2, col="blue", xlab="Inter-peak distance (kb)", ylab="Number of peaks withb D >= d (dCDF)", main=suffix, panel.first=grid())

## decreasing Cumulative Distribution Function (dCDF), log axes
plot(inter.peak.dist.distrib$dist.kb,
     inter.peak.dist.distrib$dCDF,
     type="l", log="", lwd=2, col="blue", xlab="Inter-peak distance (kb)", ylab="Number of peaks withb D >= d (dCDF)", main=suffix, panel.first=grid())

## decreasing Cumulative Distribution Function (dCDF)
plot(inter.peak.dist.distrib$dist.kb,
     inter.peak.dist.distrib$dCDF,
     type="l", log="y", lwd=2, col="blue", xlab="Inter-peak distance (kb)", ylab="Number of peaks withb D >= d (dCDF)", main=suffix, panel.first=grid())

## decreasing Cumulative Distribution Function (dCDF)
plot(inter.peak.dist.distrib$dist.kb,
     inter.peak.dist.distrib$dCDF,
     type="l", log="xy", lwd=2, col="blue", xlab="Inter-peak distance (kb)", ylab="Number of peaks withb D >= d (dCDF)", main=suffix, panel.first=grid())

par(mfrow=c(1,1))
setwd(dir.peaks); export.plot(file.prefix=paste(sep="", suffix, '_dist_distrib'), export.formats=export.formats.plots, width=6,height=12)
verbose(paste("Peak distributions saved in", dir.peaks))

