#!/bin/bash

genome="mm9"
gapFile="lib/$genome.gap.bed"
chromFile="lib/$genome.chrom.sizes"

#Checking so all required files exist
if [ $# -ne "2" ];then
	echo "Incorrect Number of arguments"
	echo "> calculateOverlapFold.sh query.bed lib.bed"
	exit
fi

if [ ! -e $1 ]; then
	echo "The query file $1 doesn't exist"
	exit
fi

if [ ! -e $2 ]; then
	echo "The library file $2 doesn't exist"
	exit
fi

if [ ! -e "$gapFile" ]; then
	echo "$gapFile doesn't exist"
	exit
fi

if [ ! -e "$chromFile" ]; then
	echo "$chromFile doesn't exist"
	exit
fi

if [ ! -e $columnDef ]; then
	echo "Column definition file $columnDef doesn't exist!"
	exit
fi

if [ "$(type -P overlapSelect)" = "" ]; then 
	echo "This code requires overlapSelect. To continue
1) Download overlapSelect from  http://hgdownload.cse.ucsc.edu/admin/exe/
2) Make overlapSelect executable using 'chmod +x overlapSelect
3) Add overlapSelect to your PATH" 
fi

#Setting up variables
columnDef="lib/columnDef.lst"
tempDir=$(mktemp -d temp.XXXX)
queryFile="$tempDir/query.bed"
libFile="$tempDir/library.bed"
statFile="$tempDir/lib.stat"

#Cleaning and sorting input files
cat $1 | sort -k1,1 -k2,2g | grep -v random | grep -v Un > $queryFile
cat $2 | sort -k1,1 -k2,2g | grep -v random | grep -v Un > $libFile

#Reading column definitions, printing header of out file
i=1
indexString=""
printf "#" > $statFile
while read line; do
	columnName["$i"]=$(echo "$line"|cut -d '	' -f1)
	columnLmax["$i"]=$(echo "$line"|cut -d '	' -f2)
	indexString="$indexString $i"
	printf "	%s" ${columnName[$i]} >> $statFile
	let i=i+1
done < $columnDef
echo "" >> $statFile

elementLengths=$(echo "" |awk 'BEGIN {printf "0 5 10 15 ";for(a=20;a<10000000;a=1.3*a)printf("%d ",a)} {}')
for elementLength in $elementLengths;do
	let padding=$(echo "scale=0;$elementLength/2"|bc)
	#Finding the possible configruations for this element length after padding
	lib/bedBits.py $chromFile \!$gapFile:$padding --bed $tempDir/$genome.conf.$elementLength.bed
	configruationCount=$(lib/bedBits.py $chromFile \!$gapFile:$padding | cut -f1)
	#Calculating the number of configruations in distance bins for diffrent element lneghts
	printf "$elementLength" >> $statFile
	lastP="0"
	for i in $indexString; do
		bin=${columnLmax[$i]}
		#If regular bin(i.e. Lmax != "X", calculate new p-value)
		if [ "$bin" != "X" ] ;then
			let binPadding=$bin+$padding
			binConfs=$(lib/bedBits.py $chromFile $libFile:$binPadding $tempDir/$genome.conf.$elementLength.bed | cut -f1)
			p=$(echo "scale=12;$binConfs/$configruationCount"| bc)
			pBin=$(echo "scale=12;$p-$lastP"| bc) 
			lastP=$p
			printf "	$pBin" >> $statFile
		else 
			lastP=$(echo "scale=5;1-$lastP"| bc)
			printf "	$lastP" >> $statFile
		fi
	done
	rm $tempDir/$genome.conf.$elementLength.bed
	echo ''>> $statFile
done

expectation=$(lib/calculateDistanceExpectation.py $queryFile $statFile | cut -f1)
#Counting number of overlaps.
nOverlap=$(overlapSelect $libFile $queryFile stdout | wc -l)
#Calculating fold enrighment
echo "scale=10;$nOverlap/$expectation"| bc
#Removing temporary files
rm -r $tempDir

