'''
Summarizes all simulated motif conservation probabilities/counts.
Prints all simulated motif conservation probabilities/counts to standard output.
Usage: python3 summarize_simulated_motif_conservation.py <motif conservation results directory> <output file>
'''

import sys, os
import statistics as stats

if len(sys.argv) != 3:
	sys.exit(__doc__)

dir_path = sys.argv[1].strip('/') + '/'
out_file = sys.argv[2]

random_conserved_nums = {}
random_conserved_freqs = {}
motif = ''
species_order = []
transition_rates = {}
transversion_rates = {}
expected_counts = {}
expected_freqs = {}
list_files = os.listdir(dir_path)
for filename in list_files:
	with open(dir_path + filename, 'r') as f:
		#Check to make sure that the motif being analyzed for conservation is the same
		motif_line = f.readline()
		if motif == '': #Get motif of first file
			motif = motif_line.split(':')[1].strip()
		else: #Confirm if motif is the same
			if motif_line.split(':')[1].strip() == motif:
				pass
			else: #Not matching motif, exit
				print(filename + ' has mismatching motif')
				sys.exit(__doc__)
		#Move on transition/transversion rate and conservation rate
		mutation_rate_info = True
		for line in f:
			if line.strip() == '': #Switch from mutation rate info to conservation rate info, occurs when there's an empty line separating the two sections
				mutation_rate_info = False
				continue
			fields = line.rstrip().split('\t')
			#Check to make sure transition/transversion rates are the same
			if mutation_rate_info:
				species = fields[0]
				if species == 'Species': #If header of transition/transversion rates info, skip
					continue
				transition_rate = fields[1]
				transversion_rate = fields[2]
				if species not in transition_rates: #Get the transition/transversion rates of the first file
					transition_rates[species] = transition_rate
					transversion_rates[species] = transversion_rate
					species_order.append(species)
				else: #Confirm if transition/transversion rates are the same
					if transition_rate != transition_rates[species] or transversion_rate != transversion_rates[species]:
						print(filename + ' does not have the same transition/transversion rates')
						sys.exit(__doc__)
			#Get conservation rate info
			else:
				species = fields[0]
				if species == 'Species': #If header of conservation rate info, skip
					continue
				expected_count = fields[3]
				expected_freq = fields[2]
				if species not in expected_counts: #Get the expected motif conservation number of the first file
					expected_counts[species] = expected_count
					expected_freqs[species] = expected_freq
				else: #Confirm if the expected motif conservation number is the same
					if expected_count != expected_counts[species]:
						print(filename + ' does not have the same expected conservation count')
						sys.exit(__doc__)
				random_conserved_num = int(fields[4])
				random_conserved_freq = float(fields[-1])
				if species not in random_conserved_nums:
					random_conserved_nums[species] = [random_conserved_num]
					random_conserved_freqs[species] = [random_conserved_freq]
				else:
					random_conserved_nums[species].append(random_conserved_num)
					random_conserved_freqs[species].append(random_conserved_freq)

#Write motif conservation info from simulations to output file
with open(out_file, 'w') as o:
	o.write('Motif: ' + motif + '\n')
	o.write('Species\tTransition_rate\tTransversion_rate\n')
	for species in species_order:
		o.write(species + '\t' + transition_rates[species] + '\t' + transversion_rates[species] + '\n')
	o.write('\n')
	o.write('Species\tExpected_conservation_rate\tExpected_conservation_number\n')
	for species in species_order:
		o.write(species + '\t' + expected_freqs[species] + '\t' + expected_counts[species] + '\n')
	o.write('\n')
	o.write('1000 simulated motif evolutions\n')
	o.write('Species\tMean_conservation_rate\tStdev_conservation_rate\tMean_conservation_num\tStdev_conservation_num\n')
	for species in species_order:
		o.write(species + '\t' + str(stats.mean(random_conserved_freqs[species])) + '\t' + str(stats.stdev(random_conserved_freqs[species])) + '\t' + 
			str(stats.mean(random_conserved_nums[species])) + '\t' + str(stats.stdev(random_conserved_nums[species])) + '\n')
	o.write('\n')

#Print all simulation motif conservation frequencies and numbers to standard output
#Header
print('Type\tValue')
#Data
for species in species_order:
	for value in random_conserved_freqs[species]:
		print(species + '_freq' + '\t' + str(value))
	for value in random_conserved_nums[species]:
		print(species + '_num' + '\t' + str(value))

#Header
#print('\t'.join([(species + '_freq') for species in species_order]) + '\t' + '\t'.join([(species + '_num') for species in species_order]))
#Values
#for i in range(len(random_conserved_freqs[species_order[0]])):
#	print('\t'.join([str(random_conserved_freqs[species][i]) for species in species_order]) + '\t' + '\t'.join([str(random_conserved_nums[species][i]) for species in species_order]))

