'''
Calculates simple distance from consensus given a full consensus alignment file.
Simple distance = proportion of mismatches in alignable sequence (does not count gaps)
Usage: python3 calc_simpleDistance_fromConsensusAlign_individual.py <full consensus alignment file> <output file>
'''

import sys

if len(sys.argv) != 3:
	sys.exit(__doc__)

di = {}
list_names = []
with open(sys.argv[1], 'r') as f:
	for line in f:
		if line.startswith(">"):
			name = line.rstrip('\n').lstrip('>')
			seq = f.readline().rstrip('\n').upper()
			consensus = f.readline().rstrip('\n').upper()
			if len(seq) != len(consensus): #check to make sure
				print(name + ' sequence and consensus do not have the same length')
				sys.exit(__doc__)
			match = 0
			mismatch = 0
			for i in range(len(seq)):
				seq_nuc = seq[i]
				consensus_nuc = consensus[i]
				if seq_nuc == '-' or consensus_nuc == '-': #Skip nucleotide if gap
					continue
				if consensus_nuc == 'M' or consensus_nuc == 'W' or consensus_nuc == 'K' or consensus_nuc == 'N': #Skip nucleotide is consensus is degenerate base
					continue
				else:
					if seq_nuc == consensus_nuc:
						match += 1
					else:
						mismatch += 1
			total = match + mismatch
			distance = mismatch/total
			di[name] = distance
			list_names.append(name)

with open(sys.argv[-1], 'w') as o:
	o.write('Element\tDistance\n')
	for name in list_names:
		o.write(name + '\t' + str(di[name]) + '\n')

