'''
Calculates and adds odds ratio info for kmers.
Usage: add_kmerOddsRatio.py <input kmer info file> <kmer result important .txt file> <output file>
'''

import sys, os

if len(sys.argv) != 4:
	sys.exit(__doc__)

di = {}
order = []
with open(sys.argv[1], 'r') as f:
	for line in f:
		fields = line.rstrip('\n').split()
		kmer = fields[0].lower()
		di[kmer] = fields[1:]
		order.append(kmer)

#Check if the kmer result important file is zipped or not
if ".gz" in sys.argv[2]:
	os.system("gunzip " + sys.argv[2])
	infoFile = sys.argv[2].rstrip('.gz')
else:
	infoFile = sys.argv[2]

#Add 1 pseudocount to each case/control present/absent
with open(infoFile, 'r') as f:
	header = f.readline()
	di['kmer'].append('Odds_ratio')
	for line in f:
		fields = line.rstrip('\n').split()
		kmer = fields[1].lower()
		casePresent = int(fields[2]) + 1
		ctrlPresent = int(fields[3]) + 1
		caseAbsent = int(fields[4]) + 1
		ctrlAbsent = int(fields[5]) + 1
		caseTotal = casePresent + caseAbsent
		ctrlTotal = ctrlPresent + ctrlAbsent
		odds_ratio = float(casePresent/caseTotal)/(ctrlPresent/ctrlTotal)
		di[kmer].append(str(odds_ratio))

with open(sys.argv[3], 'w') as o:
	for kmer in order:
		line = kmer + '\t' + di[kmer][-1] + '\t' + '\t'.join(di[kmer][:-1])
		o.write(line + '\n')

