'''
Applies SHARPR scores in given file to multiple sequence aligned fasta files.
Requires fasta names/identifiers to be the same.
Usage: python3 apply_sharpr_scores_to_MSA.py <input MSA fasta file> <input SHARPR fasta file> <output file>
'''

import sys
if len(sys.argv) != 4:
	sys.exit(__doc__)

di_msa = {}
order = []
with open(sys.argv[1], 'r') as f:
	for line in f:
		if ">" in line:
			name = line.rstrip('\n')
			seq = f.readline().rstrip('\n')
			di_msa[name] = seq
			order.append(name)

di_sharpr = {}
with open(sys.argv[2], 'r') as f:
	for line in f:
		if ">" in line:
			name = line.rstrip('\n')
			scores = f.readline().rstrip('\n').split('\t')[1:]
		else:
			name = '>element_' + line.rstrip('\n').split('|tile')[0]
			scores = line.rstrip('\n').split('\t')[1:]
		seq = di_msa[name]
		sharpr_seq = []
		scores_counter = 0
		for pos in range(len(seq)):
			if seq[pos] == '-':
				sharpr_seq.append('NA')
			else:
				sharpr_seq.append(str(scores[scores_counter]))
				scores_counter += 1
		di_sharpr[name] = sharpr_seq

with open(sys.argv[3], 'w') as o:
	for name in order:
		o.write(name + '\n')
		o.write('\t'.join(di_sharpr[name]) + '\n')

