'''
Adjust Needle alignments so that all the consensus sequences are the same. Does this by removing all gaps in the consensus and corresponding positions in the alignment.
Assumes modified fasta format with two sequences, first for the sequence alignment and second for the consensus alignment.
Usage: python3 adjust_alignment.py <input file> <output file>
'''

import sys

if len(sys.argv) != 3:
	sys.exit(__doc__)

order = []
di_in = {}
with open(sys.argv[1], 'r') as f:
	for line in f:
		if ">" in line:
			name = line.rstrip('\n').lstrip('>')
			seq = f.readline().rstrip('\n')
			cons = f.readline().rstrip('\n')
			if name in di_in:
				print(name)
				sys.exit('More than one sequence with the same name')
			else:
				di_in[name] = [seq, cons]
				order.append(name)

with open(sys.argv[2], 'w') as o:
	for name in order:
		gaps = []
		cons = di_in[name][1]
		for i in range(len(cons)):
			if cons[i] == '-':
				position = i - len(gaps)
				gaps.append(position)
		seq = di_in[name][0]
		to_skip = []
		for i in range(len(seq)):
			if i in gaps:
				num = gaps.count(i)
				position = gaps.index(i) + i
				for j in range(num):
					to_skip.append(position + j)
		new_seq = ''
		for i in range(len(seq)):
			if i in to_skip:
				continue
			else:
				new_seq += seq[i]
		o.write('>' + name + '\t' + ','.join([str(i) for gap in gaps]) + '\n')
		o.write(new_seq + '\n')

