#!/bin/bash
#Script to realign sequences using Needle (Needleman-Wunsch global alignment)
#For obtaining aligned file required for PLINK analysis
#Usage: needle_realign.sh <input alignment fasta file> <consensus fasta file> <output file name>

#Check to make sure number of arguments is correct
if [ "$#" -ne 3 ]; then
	echo "Usage: needle_realign.sh <input alignment fasta file> <consensus fasta file> <output file name>"
	exit 1
fi

#scriptsDir indicates where this script and the directory containing required python scripts (realign_needle_scripts) are located
scriptsDir="$(dirname $(readlink -f $0))"
needleAlign="$scriptsDir/realign_needle_scripts"

#First parameter is the input fasta file to be aligned
input=$1
#Second parameter is the consensus sequence in fasta format, Name of sequence must be "consensus"
consensus=$2
#Third parameter is the desired output file name
output=$3

#Make temporary working directory
workDir=`mktemp -d`

#Copy input file, rename identifiers by replacing ":" with "@", and "," with "?" (this is done for the Needle alignment)
cp $input $workDir/input.fa
sed -i -e 's/:/@/g' $workDir/input.fa
sed -i -e 's/,/?/g' $workDir/input.fa

#Perform Needle alignment for individual sequences
python3 $needleAlign/setup_needle_kmers.py $workDir/input.fa $workDir/dir_individual_seq
mkdir $workDir/needle_out
for i in `ls $workDir/dir_individual_seq`; do
	needle -asequence $workDir/dir_individual_seq/"$i" -bsequence $consensus -gapopen 10 -gapextend 10 -outfile $workDir/needle_out/"$i"
	python3 $needleAlign/parse_needleOutput_fasta.py $workDir/needle_out/"$i" >> $workDir/all_needle_out
done
echo "Finished parsing Needle alignments"

#Adjust alignments to remove gaps created in the consensus sequence
python3 $needleAlign/adjust_alignments.py $workDir/all_needle_out $workDir/realigned_needle_withGapInfo
cut -f 1 $workDir/realigned_needle_withGapInfo > $workDir/realigned_needle_alignOnly

#Reverse identifiers to original names
sed -i -e 's/@/:/g' $workDir/realigned_needle_alignOnly
sed -i -e 's/?/,/g' $workDir/realigned_needle_alignOnly

#Add consensus to Needle realigned sequences
cat $workDir/realigned_needle_alignOnly $consensus >> $output

rm -r $workDir

