import os
import numpy
import re 

#parse mega outputt 
#make a table with orf name 
#dist with cer
#dist within spar
def parse_meg(mega):
	dist_cer=[]
	dist_spar=[]
	for line in mega:
		line=line.rstrip()
		
		m1=re.match(r"^\[\d\]\s+\d", line)
		if m1 is not None:
			#print line
			
			array=" ".join(line.split()).split(" ")
			#print array[0]
			dist_cer.append(float(array[1]))
			#dist_spar.append(array[1])
			if array[0]=="[3]":
				dist_spar.append(float(array[2]))
				dist_cer.append(float(array[2]))
			if array[0]=="[4]":
				dist_spar.append(float(array[2]))
				dist_cer.append(float(array[2]))
				dist_spar.append(float(array[3]))
				dist_cer.append(float(array[3]))
	
	mean_cer=numpy.mean(dist_cer)
	mean_par=numpy.mean(dist_spar)
	return [mean_cer, mean_par]



#ORFS###################################################################
dir_mega="../../11_selection/03_Mega_results/02_orf_pairs/"

list_files=os.popen("ls "+dir_mega+"*.meg").read().rstrip().split("\n")

with open("../../11_selection/03_Mega_results/dist_orf.txt","w") as out:
	for filei in list_files:
		feat_name=filei.split("/")[-1].replace("_mega_pairdist.meg","")
		with open(filei) as mega:
			dist=parse_meg(mega)
		
		newline="\t".join([feat_name,str(dist[0]),str(dist[1])])
		out.write(newline+"\n")


#GENES###################################################################
dir_mega="../../11_selection/03_Mega_results/01_genes_pairs/"

list_files=os.popen("ls "+dir_mega+"*.meg").read().rstrip().split("\n")

with open("../../11_selection/03_Mega_results/dist_genes.txt","w") as out:
	for filei in list_files:
		feat_name=filei.split("/")[-1].replace("_mega_pairdist.meg","")
		with open(filei) as mega:
			dist=parse_meg(mega)
		
		newline="\t".join([feat_name,str(dist[0]),str(dist[1])])
		out.write(newline+"\n")


