#!/bin/bash

#This script is made to find the best alignments of kmers from the TWAS scripts to the consensus sequence.

#First (1st) argument is fasta file of kmers to be aligned
#Second (2nd) argument is fasta file containing the consensus sequence
#Third (3rd) argument is desired output file name

ARGS=("$@")

# Check for correct number of arguments
if [ "$#" -ne 3 ]; then
	echo "Usage: alignKmers_needle.sh [kmers .fa file] [consensus .fa file] [output file name]"
	exit 1
fi

#scriptsDir indicates where this script and the directory containing required python scripts (align_Kmers) is located
scriptsDir="$(dirname $(readlink -f $0))"
alignKmers="$scriptsDir"/align_Kmers

KMERS=${ARGS[0]}
CONSENSUS=${ARGS[1]}
OUTPUT=${ARGS[2]}

workDir=`mktemp -d`

python3 $alignKmers/setup_needle_kmers.py $KMERS $workDir/input_dir
mkdir $workDir/needle_out
for i in `ls $workDir/input_dir`; do
	#Needle alignment is heavily penalizing gaps so that mismatches are preferred over long gaps
	needle -asequence $workDir/input_dir/"$i" -bsequence $CONSENSUS -gapopen 10 -gapextend 10 -outfile $workDir/needle_out/"$i"
done

echo "Kmer	log10_p-value	consensus_start	consensus_stop	consensus_gap_positions" > $OUTPUT
for i in `ls $workDir/needle_out`; do
	python3 $alignKmers/parse_needle_KmerOutput.py $workDir/needle_out/"$i" >> $OUTPUT 2>> "$OUTPUT"_seq.fa
done

rm -r $workDir

