'''
Separates significant kmers into case enriched/depleted categories.
Requires the kmer odds ratio file and the consensus aligned kmers file.
Usage: python3 separate_Kmers_oddRatio.py <kmers odds ratio file> <consensus aligned kmers .fa file> <output file basename>
'''

import sys

if len(sys.argv) != 4:
	sys.exit(__doc__)

di = {}
with open(sys.argv[2], 'r') as f:
	for line in f:
		if ">" in line:
			name = line.rstrip('\n').lstrip('>')
			kmer = name.split('_')[0]
			seq = f.readline().rstrip('\n')
			di[kmer] = seq

enrich_order = []
deplete_order = []
with open(sys.argv[1], 'r') as f:
	header = f.readline()
	for line in f:
		fields = line.rstrip('\n').split('\t')
		kmer = fields[0]
		odds = float(fields[1])
		if odds > 1:
			enrich_order.append(kmer)
		else:
			deplete_order.append(kmer)

enrich_file = sys.argv[3] + '_caseEnriched.fa'
with open(enrich_file, 'w') as o:
	for kmer in enrich_order:
		o.write('>' + kmer + '\n')
		o.write(di[kmer] + '\n')

deplete_file = sys.argv[3] + '_caseDeplete.fa'
with open(deplete_file, 'w') as o:
	for kmer in deplete_order:
		o.write('>' + kmer + '\n')
		o.write(di[kmer] + '\n')

