#!/bin/bash
#Script separate Kmers into case-enriched and case-depleted categories. Kmers are ordered based on significance, from most significant to least significant.
#Used after alignKmers_needle.sh
#Usage: assign_Kmer_significance_direction.sh <aligned Kmers info file> <important .txt file for Kmer of same length> <aligned Kmers .fa file> <output file basename>

#Check to make sure number of arguments is correct
if [ "$#" -ne 4 ]; then
	echo "Usage: assign_Kmer_significance_direction.sh <aligned Kmers info file> <important .txt file for Kmer of same length> <aligned Kmers .fa file> <output file basename>"
	exit 1
fi

#scriptsDir indicates where this script and the directory containing required python scripts (separate_Kmers) are located
scriptsDir="$(dirname $(readlink -f $0))"
sepKmers="$scriptsDir/separate_Kmers"

#First parameter is the Kmers info file obtained from alignKmers_needle.sh
infoFile=$1
#Second parameter is the important .txt file obtained from the TWAS Kmers analysis. Must be for Kmers of the same length. Does not have to be unzipped because the python script will unzip if it ends in ".gz"
important=$2
#Third parameter is the aligned Kmers fasta file obtained from alignKmers_needle.sh
alignedFile=$3
#Fourth parameter is the desired output file basename. "caseEnriched" and "caseDepleted" will be specified in the file name
output=$4

#Make temporary file
tempFile=`mktemp`

#Calculate odds ratios for each Kmer and sort by odds ratio
python3 $sepKmers/add_kmerOddsRatio.py $infoFile $important $tempFile
sort -k 3 -V -r $tempFile > "$output"_odds_ratios

#Separate Kmers into case-enriched or case-depleted based on odds ratio
python3 $sepKmers/separate_Kmers_oddRatio.py "$output"_odds_ratios $alignedFile $output

rm $tempFile

