'''
Calculates summary statistics (mean, standard deviation, coefficient of variation) for all columns after column 2 (after CRE column). Assumes all columns are different replicates.

Usage: python3 calc_replicate_summary_statistics.py <input file> <output file>
'''

import sys
import numpy as np
import math

if len(sys.argv) != 3:
	sys.exit(__doc__)

order = []
di = {}
with open(sys.argv[1], 'r') as f:
	header = f.readline()
	# Calculate summary statistics and save to dictionary di
	for line in f:
		fields = line.rstrip('\n').split('\t')
		BC = fields[0]
		CRE = fields[1]
		values = []
		for value in fields[2:]:
			if value == 'NA':
				continue
			else:
				values.append(float(value))
		to_sum = np.array(values)
		mean = np.mean(to_sum)
		std = np.std(to_sum)
		sem = std/math.sqrt(len(to_sum))
		new_fields = [BC, CRE, str(mean), str(std), str(std/mean), str(sem), str(sem/mean)]
		di[BC] = new_fields
		order.append(BC)

with open(sys.argv[-1], 'w') as o:
	# Make and write new header
	new_header = 'BC\tCRE\tMean\tStdev\tCoV\tSEM\tSEM/Mean'
	o.write(new_header + '\n')
	# Write summary statistics
	for BC in order:
		fields = di[BC]
		o.write('\t'.join(fields) + '\n')

