'''
Given a list of full element names, calculate the mean +/- stdev sharpr scores at each position for the group of elements. Ignores 'NA' values when calculating the mean.
Outputs the list of elements (commented) and the mean +/- stdev sharpr scores.
Usage: python3 calc_mean_group_sharpr_seq.py <input sharpr seq file> <list of elements> <output file>
'''

import sys
import numpy as np
if len(sys.argv) != 4:
	sys.exit(__doc__)

di = {}
with open(sys.argv[1], 'r') as f:
	for line in f:
		if ">" in line:
			cre = line.rstrip('\n').split('>')[1].split('element_')[1]
			seq = f.readline().rstrip('\n').split('\t')
			di[cre] = seq

seq_to_mean = []
with open(sys.argv[2], 'r') as f:
	for line in f:
		cre = line.rstrip('\n')
		if ">" in cre:
			cre = cre.split('>')[1]
		if "element_" in cre:
			cre = cre.split('element_')[1]
		seq_to_mean.append(cre)

scores = []
for cre in seq_to_mean:
	if cre in di:
		fields = di[cre]
		for pos in range(len(fields)):
			value = fields[pos]
			if value == 'NA':
				try:
					if scores[pos] == []: #just check to see if the position exists
						continue
					else:
						continue
				except IndexError:
					scores.append([])
			else:
				try:
					scores[pos].append(float(value))
				except IndexError:
					scores.append([])
					scores[pos].append(float(value))

mean_scores = []
mean_plus = []
mean_minus = []
for pos in range(len(scores)):
	if scores[pos] == []:
		mean_scores.append('NA')
		mean_plus.append('NA')
		mean_minus.append('NA')
		continue
	position_vals = np.array(scores[pos])
	mean = np.mean(position_vals)
	stdev = np.std(position_vals)
	mean_scores.append(str(mean))
	mean_plus.append(str(mean+stdev))
	mean_minus.append(str(mean-stdev))

with open(sys.argv[3], 'w') as o:
	for cre in seq_to_mean:
		o.write('#' + cre + '\n')
	o.write('mean\t' + '\t'.join(mean_scores) + '\n')
	o.write('mean+stdev\t' + '\t'.join(mean_plus) + '\n')
	o.write('mean-stdev\t' + '\t'.join(mean_minus) + '\n')

