'''
Takes standardized alignments and base counts of each position to filter positions based given base count cutoff.
Outputs insertion positions relative to remaining positions as standard output.
Usage: python3 get_alignment_givenBaseCountCutoff.py <input standard alignment> <base counts file> <base count cutoff> <output file>
'''

import sys

if len(sys.argv) != 5:
	sys.exit(__doc__)

try:
	base_cutoff = int(sys.argv[3])
except TypeError:
	print('<base count cutoff> must be an integer')
	sys.exit(__doc__)

positions_pass_cutoff = []
with open(sys.argv[2], 'r') as f:
	for line in f:
		if '#' in line:
			continue
		counts = line.rstrip('\n').split(',')
		for position in range(len(counts)):
			if int(counts[position]) >= base_cutoff: #if position passes given base count cutoff, consider it as consensus position, add it to list of consensus positions
				positions_pass_cutoff.append(position)

di = {}
order = []
with open(sys.argv[1], 'r') as f:
	for line in f:
		if '>' in line:
			name = line.rstrip('\n')
			seq = f.readline().rstrip('\n')
			new_seq = ''
			insertion_positions = []
			deletion_positions = []
			consensus_counter = 0 #counter to keep track of position of consensus
			for i in range(len(seq)):
				if i in positions_pass_cutoff: #if position is in consensus, add the base to the new alignment sequence. Keep track of deletions
					if seq[i] == '-': #seq has deletion at the position, add position to deletion positions list for this sequence
						deletion_positions.append(str(consensus_counter))
						new_seq += seq[i]
						consensus_counter += 1
					else: #seq has base at the position, add the base to the new alignment sequence. Increase consensus counter by 1
						new_seq += seq[i]
						consensus_counter += 1
				else: #position is not in consensus
					if seq[i] == '-': #seq has gap at the position, matching the consensus. Skip this position for consensus counter and new seq
						continue
					else: #seq has insertion at the position, so add the position relative to consensus to insertion positions
						insertion_positions.append(str(consensus_counter))
			if deletion_positions == []: #if no deletion positions, use "." to indicate so that it's more clear in output
				deletion_positions.append('.')
			if insertion_positions == []: #if no insertion positions, use "." to indicate so that it's more clear in output
				insertion_positions.append('.')
			di[name] = [new_seq, ','.join(deletion_positions), ','.join(insertion_positions)]
			order.append(name)

with open(sys.argv[4], 'w') as o:
	for name in order:
		o.write(name + '\n' + di[name][0] + '\n')

print('Name\tDeletion_positions\tInsertion_positions')
for name in order:
	print(name.lstrip('>') + '\t' + di[name][1] + '\t' + di[name][2])

