#!/bin/bash
RESULTS_FOLDER=`pwd`

function fasta_length {
	echo $1
	lines=`cat $1 | grep -v ">" | wc -l`
	chars=`cat $1 | grep -v ">" | wc -c`
	length=`expr $chars - $lines`
}

if [ -z "${CNVER_FOLDER}" ]; then
	echo "Please set CNVER_FOLDER enviornment variable to the correct path"
	exit
fi

if [ -z "${CS2_PATH}" ]; then
	echo "Please set CS2_PATH enviornment variable to the correct path"
	exit
fi

TEMP_FOLDER=/tmp/$$_cnver
mkdir -p ${TEMP_FOLDER}
if [ ! -d ${TEMP_FOLDER} ]; then
	echo "Failed to create temp folder!"
fi

if [ $# -ne 10 ]; then
	echo "$0 mapping_files_list contig_breaks_folder repeat_regions_folder self_alignments_folder fasta_files_folder read_length mean_insert_size stdev_insert_size min_mps_per_cluster contig_name_file"
	exit
fi

MAPPING_FILES_LIST=$1
if [ ! -e ${MAPPING_FILES_LIST} ]; then
	echo "File ${MAPPING_FILES_LIST} does not exist!"
	exit
fi
CONTIG_BREAKS_FOLDER=$2
if [ ! -d ${CONTIG_BREAKS_FOLDER} ]; then
	echo "Folder ${CONTIG_BREAKS_FOLDER} does not exist!"
	exit
fi
REPEAT_REGIONS_FOLDER=$3
if [ ! -d ${REPEAT_REGIONS_FOLDER} ]; then
	echo "Folder ${REPEAT_REGIONS_FOLDER} does not exist!"
	exit
fi
SELF_ALIGNMENTS_FOLDER=$4
if [ ! -d ${SELF_ALIGNMENTS_FOLDER} ]; then
	echo "Folder ${SELF_ALIGNMENTS_FOLDER} does not exist!"
	exit
fi
FASTA_FILES_FOLDER=$5
if [ ! -d ${FASTA_FILES_FOLDER} ]; then
	echo "Folder ${FASTA_FILES_FOLDER} does not exist!"
	exit
fi
READ_LENGTH=$6
MEAN_INSERT_SIZE=$7
STDEV_INSERT_SIZE=$8
MIN_MPS_PER_CLUSTER=$9
CONTIG_NAME_FILE=${10}
DONOR_EDGES_FOLDER=${TEMP_FOLDER}/donor_edges
REFERENCE_GRAPH_FOLDER=${TEMP_FOLDER}/reference_graphs
DONOR_GRAPH_FOLDER=${TEMP_FOLDER}/donor_graphs
AUX_DATA_FOLDER=${TEMP_FOLDER}/aux_data

while read CONTIG; do
	if [ ! -e ${CONTIG_BREAKS_FOLDER}/${CONTIG}.cbs ]; then 
		echo "Missing ${CONTIG_BREAKS_FOLDER}/${CONTIG}.cbs"
		exit
	fi
	if [ ! -e ${REPEAT_REGIONS_FOLDER}/${CONTIG}.rep ]; then 
		echo "Missing ${REPEAT_REGIONS_FOLDER}/${CONTIG}.rep"
		exit
	fi
	if [ ! -e ${SELF_ALIGNMENTS_FOLDER}/${CONTIG}.axt ]; then 
		echo "Missing ${SELF_ALIGNMENTS_FOLDER}/${CONTIG}.axt"
		exit
	fi
	if [ ! -e ${FASTA_FILES_FOLDER}/${CONTIG}.fa ]; then 
		echo "Missing ${FASTA_FILES_FOLDER}/${CONTIG}.fa"
		exit
	fi
done < ${CONTIG_NAME_FILE}

#Create donor edges from mapped reads
if [ ! -d ${CNVER_FOLDER}/cluster ]; then
	echo "Folder ${CNVER_FOLDER}/cluster does not exist!"
	exit
fi
cat ${CONTIG_BREAKS_FOLDER}/* | awk '{print 0,$1,$2,$3}' > ${TEMP_FOLDER}/contig_breaks
rm -rf ${DONOR_EDGES_FOLDER}
command="sh ${CNVER_FOLDER}/cluster/cluster_main.sh ${MAPPING_FILES_LIST} ${TEMP_FOLDER}/contig_breaks ${DONOR_EDGES_FOLDER}/ ${READ_LENGTH} ${MEAN_INSERT_SIZE} ${STDEV_INSERT_SIZE} ${MIN_MPS_PER_CLUSTER} ${CONTIG_NAME_FILE}"
echo $command
$command
command="sh ${DONOR_EDGES_FOLDER}/conc_script"
echo $command
$command
command="sh ${DONOR_EDGES_FOLDER}/sort_script"
echo $command
$command
command="sh ${DONOR_EDGES_FOLDER}/link_script"
echo $command
$command
rm ${DONOR_EDGES_FOLDER}/clusters* ${DONOR_EDGES_FOLDER}/mapped_reads* 

#Create the reference graphs
rm -rf ${REFERENCE_GRAPH_FOLDER}
mkdir -p ${REFERENCE_GRAPH_FOLDER}
while read CONTIG; do
	fasta_length ${FASTA_FILES_FOLDER}/${CONTIG}.fa
	command="${CNVER_FOLDER}/axt_to_binary ${SELF_ALIGNMENTS_FOLDER}/${CONTIG}.axt $length ${REFERENCE_GRAPH_FOLDER}/${CONTIG}.edges"
	echo $command
	$command
	command="${CNVER_FOLDER}/make_reference_graph ${REFERENCE_GRAPH_FOLDER}/${CONTIG}.edges ${REFERENCE_GRAPH_FOLDER}/${CONTIG}.graph"
	echo $command
	$command
	command="${CNVER_FOLDER}/fill_reference_graph ${REFERENCE_GRAPH_FOLDER}/${CONTIG}.graph $length"
	echo $command
	$command
done < ${CONTIG_NAME_FILE}

#Create the binary mapping files
while read FILE; do
	folder=${TEMP_FOLDER}/mapping_files/`basename $FILE`
	rm -rf $folder
	mkdir -p $folder
	cd $folder
	echo cat ${FILE} \| ${CNVER_FOLDER}/bwtie_concise_to_binary 1
	cat ${FILE} | ${CNVER_FOLDER}/bwtie_concise_to_binary 1
	cd ..
done < ${MAPPING_FILES_LIST}

#Create Scov and gc maps file
rm -rf ${AUX_DATA_FOLDER}
mkdir -p ${AUX_DATA_FOLDER}
cd ${AUX_DATA_FOLDER}
while read CONTIG; do
	f=`find ${TEMP_FOLDER}/mapping_files/ | grep ${CONTIG}_mod | grep "final$"`
	fasta_length ${FASTA_FILES_FOLDER}/${CONTIG}.fa
	echo cat $f \| ${CNVER_FOLDER}/make_simple_coverage - ${CONTIG} $length
	cat $f | ${CNVER_FOLDER}/make_simple_coverage - ${CONTIG} $length
	if [ ! -e ${CONTIG}.scov ]; then
		echo "There is no data for the contig ${CONTIG}!!!!"
		exit
	fi
	#make spikes
	echo Spikes
	${CNVER_FOLDER}/find_spikes ${CONTIG}.scov  0 15 | grep -v low | awk '{s=$2; if (s>1) {s=s-2} else {s=0} print $1,s,$3+2,$4}' > ${CONTIG}.spikes
	#find N regions
	echo Ns
	python ${CNVER_FOLDER}/find_Ns.py ${FASTA_FILES_FOLDER}/${CONTIG}.fa ${CONTIG} > ${CONTIG}.ns
	#make the masks
	echo Masks
	cat ${CONTIG_BREAKS_FOLDER}/${CONTIG}.cbs ${REPEAT_REGIONS_FOLDER}/${CONTIG}.rep ${CONTIG}.ns ${CONTIG}.spikes | sort -k 2n | ${CNVER_FOLDER}/interval_join > ${CONTIG}.masks	
	echo GC map
	cat $f | ${CNVER_FOLDER}/make_gc_map ${FASTA_FILES_FOLDER}/${CONTIG}.fa ${CONTIG}.masks - ${CONTIG} 200 50
	#<graph_file> <donor_edges> <chrindex> <repeat_ranges> <contigbreaks> <lambdas>
	#make the donor graph
	echo ${CNVER_FOLDER}/make_donor_graph ${REFERENCE_GRAPH_FOLDER}/${CONTIG}.graph ${DONOR_EDGES_FOLDER}/links.${CONTIG} ${AUX_DATA_FOLDER}/${CONTIG}.scov ${AUX_DATA_FOLDER}/${CONTIG}.masks /dev/null ${AUX_DATA_FOLDER}/${CONTIG}.gc 1\> ${CONTIG}.problem 2\> ${CONTIG}.graphinfo
	${CNVER_FOLDER}/make_donor_graph ${REFERENCE_GRAPH_FOLDER}/${CONTIG}.graph ${DONOR_EDGES_FOLDER}/links.${CONTIG} ${AUX_DATA_FOLDER}/${CONTIG}.scov ${AUX_DATA_FOLDER}/${CONTIG}.masks /dev/null ${AUX_DATA_FOLDER}/${CONTIG}.gc 1> ${CONTIG}.problem 2> ${CONTIG}.graphinfo
	CONTIGLEN=`tail -n 1 ${CONTIG}.graphinfo | awk '{print $2}'`
	NUMREADS=`tail -n 1 ${CONTIG}.graphinfo | awk '{print $3}'`
	#prepare graph for cs2 solver and solve it
	echo perl ${CNVER_FOLDER}/flow_solve/make_cs2_graph.pl ${CONTIGLEN} ${NUMREADS} 10 ${CONTIG}.problem 2 0 20 1 0 ${CS2_PATH}
	perl ${CNVER_FOLDER}/flow_solve/make_cs2_graph.pl ${CONTIGLEN} ${NUMREADS} 10 ${CONTIG}.problem 2 0 20 1 0 ${CS2_PATH}
	#get the output cnvs
	${CNVER_FOLDER}/report_cnvs ${CONTIG}.graphinfo ${CONTIG}.problem.out 100 0 2> ${CONTIG}.cnvs 1> ${CONTIG}.used_dgs
	#copy cnvs to safety
	cp ${CONTIG}.cnvs ${CONTIG}.used_dgs ${RESULTS_FOLDER}
done < ${CONTIG_NAME_FILE}
cd ..



#rm -rf ${TEMP_FOLDER}
