'''
Finds the number of elements that align (and don't have gaps) in motif regions of the consensus.
Requires elements to be aligned to consensus in fastaLike format.
Usage: python3 find_alignable_elements_motifLocation.py <input file> <consensus start> <consensus stop> <output file>
'''

import sys

if len(sys.argv) != 5:
	sys.exit(__doc__)

start = int(sys.argv[2]) - 1
stop = int(sys.argv[3]) - 1
species_list = []
alignable_elements = []
rest_alignable = 0
with open(sys.argv[1], 'r') as f:
	for line in f:
		if line.startswith('>'):
			name = line.rstrip()
			seq = f.readline().rstrip()
			align = f.readline().rstrip()
			align_position = 0
			has_gap = False
			for i in range(len(align)):
				if align[i] != '-':
					if align_position >= start and align_position <= stop:
						if seq[i] == '-':
							has_gap = True
							break
					align_position += 1
			if has_gap:
				continue
			if ';' in name:
				species = name.split(';')[-1]
				if species not in species_list:
					species_list.append(species)
					alignable_elements.append(0)
				species_index = species_list.index(species)
				alignable_elements[species_index] += 1
			else:
				rest_alignable += 1

with open(sys.argv[-1], 'w') as o:
	o.write('Species\tNum_elements_alignable\n')
	for i in range(len(species_list)):
		o.write(species_list[i] + '\t' + str(alignable_elements[i]) + '\n')
	o.write('\n')
	o.write('Motif start: ' + sys.argv[2] + '\n')
	o.write('Motif stop: ' + sys.argv[3] + '\n')

