#!/usr/bin/env python
# -*- coding: utf-8 -*-
## Script à lancer pour reconstruire un ancêtre

import re,os,sys
import n01Genomes


def _main_(argv):
   
   if len(argv)!=2:
      print "\n    1 argument is expected:\n \
      -> the clade Name (as 'Yeast' or 'Vertebrate')\n"
      sys.exit(1)
   group=argv[1]
   chrGene(group)
   rea(group)
   
def chrGene(group):
   print 'CONTIGS / GENES'
   path='../../'+group+'/40Ancestors/'
   # dicoAnc = {A:[(name1,name2,outgroupList,deltas,nbChrGene),..]}
   dicoAnc = {}
   # deltas = [(1,2),(3,3),(3,4),..]
   deltaPairs = []
   
   directories=os.listdir(path)
   for ancName in directories:
      try:
         ancestors=os.listdir(path+ancName)
         for anc in ancestors:
            try:
               nn=re.match('([^\.]*)\.([^\.]*)\.(.*)$',anc)
               name1=nn.group(1)
               name2=nn.group(2)
               outgroups=nn.group(3)
               deltas=os.listdir(path+ancName+'/'+anc)
               for delta in deltas:
                  try:
                     dd=re.match('Delta(.*)DO(.*)$',delta)
                     delt=int(dd.group(1))
                     deltO=int(dd.group(2))
                     dirs=os.listdir(path+ancName+'/'+anc+'/'+delta)
                     if '5Ancestor' in dirs:
                        if ancName+'.'+name1+'.'+name2+'.def' in os.listdir(path+ancName+'/'+anc+'/'+delta+'/5Ancestor/'):
                           print path+ancName+'/'+anc+'/'+delta+'/5Ancestor/'+ancName+'.'+name1+'.'+name2+'.def'
                           line=[line for line in open(path+ancName+'/'+anc+'/'+delta+'/5Ancestor/'+ancName+'.'+name1+'.'+name2+'.def','r')][-1].split()
                           nbChro=int(line[0])
                           nbGene=int(line[5])
                           if ancName not in dicoAnc:
                              dicoAnc[ancName]=[(name1,name2,outgroups,[(delt,deltO)],[(nbChro,nbGene)])]
                           else:
                              tup=[x for x in dicoAnc[ancName] if (x[0]==name1 and x[1]==name2 and x[2]==outgroups)]
                              if tup:
                                 tup[0][3].append((delt,deltO))
                                 tup[0][4].append((nbChro,nbGene))
                              else:
                                 dicoAnc[ancName].append((name1,name2,outgroups,[(delt,deltO)],[(nbChro,nbGene)]))
                           if (delt,deltO) not in deltaPairs:
                              deltaPairs.append((delt,deltO))
                  except:
                     1
            except:
               1
      except:
         1
   deltaPairs.sort()
   print deltaPairs
   fileO=open(path+'Summary_'+group+'_ChrGene.txt','w')
   iniShortNameList=n01Genomes.ListShortName('../../'+group)
   iniShortNameList.sort()
   for i,x in enumerate(iniShortNameList):
         fileO.write(str(i+1)+':'+x+'\t')
   fileO.write('\n\nAnc\tG1\tG2\toutgps\t(#chr,#gen)\n(D,DO)\t\t\t\t')
   for delta in deltaPairs:
      fileO.write(str(delta)+'\t')
   fileO.write('\n')
   for ancName in sorted(dicoAnc.keys()):
      fileO.write('\n')
      for (name1,name2,outgroups,deltas,nbs) in dicoAnc[ancName]:
         fileO.write(ancName+'\t'+name1+'\t'+name2+'\t'+outgroups+'\t')
         indp=0
         zdn=zip(deltas,nbs)
         zdn.sort()
         print zdn
         for delta,nb in zdn:
            ind=deltaPairs.index(delta)
            j=ind-indp
            indp=ind
            for i in range(j):
               fileO.write('\t')
            fileO.write(str(nb))
         fileO.write('\n')
   fileO.close()

def rea(group):
   print 'REA'
   path='../../'+group+'/40Ancestors/'
   # dicoAnc = {A:[(name1,name2,outgroupList,deltas,nbMacro,nbMicro),..]}
   dicoAnc = {}
   # deltas = [(1,2),(3,3),(3,4),..]
   deltaPairs = []
   
   directories=os.listdir(path)
   for ancName in directories:
      try:
         ancestors=os.listdir(path+ancName)
         for anc in ancestors:
            try:
               nn=re.match('([^\.]*)\.([^\.]*)\.(.*)$',anc)
               name1=nn.group(1)
               name2=nn.group(2)
               outgroups=nn.group(3)
               deltas=os.listdir(path+ancName+'/'+anc)
               for delta in deltas:
                  try:
                     dd=re.match('Delta(.*)DO(.*)$',delta)
                     delt=int(dd.group(1))
                     deltO=int(dd.group(2))
                     dirs=os.listdir(path+ancName+'/'+anc+'/'+delta)
                     if '5Ancestor' in dirs:
                        if ancName+'.'+name1+'.'+name2+'.sum' in os.listdir(path+ancName+'/'+anc+'/'+delta+'/5Ancestor/'):
                           print path+ancName+'/'+anc+'/'+delta+'/5Ancestor/'+ancName+'.'+name1+'.'+name2+'.sum'
                           line=[line.split('\t') for line in open(path+ancName+'/'+anc+'/'+delta+'/5Ancestor/'+ancName+'.'+name1+'.'+name2+'.sum','r')]
                           nbMacro1=line[10][0]
                           nbMacro2=line[11][0]
                           nbMacro=line[12][0]
                           nbMicro1=line[18][0]
                           nbMicro2=line[19][0]
                           m=re.search('\(\+ ([0-9]+) ',line[20][0])
                           nbMicro=m.group(1)
                           if ancName not in dicoAnc:
                              dicoAnc[ancName]=[(name1,name2,outgroups,[(delt,deltO)],[(nbMacro1,nbMacro2,nbMacro)],[(nbMicro1,nbMicro2,nbMicro)])]
                           else:
                              tup=[x for x in dicoAnc[ancName] if (x[0]==name1 and x[1]==name2 and x[2]==outgroups)]
                              if tup:
                                 tup[0][3].append((delt,deltO))
                                 tup[0][4].append((nbMacro1,nbMacro2,nbMacro))
                                 tup[0][5].append((nbMicro1,nbMicro2,nbMicro))
                              else:
                                 dicoAnc[ancName].append((name1,name2,outgroups,[(delt,deltO)],[(nbMacro1,nbMacro2,nbMacro)],[(nbMicro1,nbMicro2,nbMicro)]))
                           if (delt,deltO) not in deltaPairs:
                              deltaPairs.append((delt,deltO))
                  except:
                     1
            except:
               1
      except:
         1
   deltaPairs.sort()
   print deltaPairs
   fileO=open(path+'Summary_'+group+'_Rea.txt','w')
   iniShortNameList=n01Genomes.ListShortName('../../'+group)
   iniShortNameList.sort()
   for i,x in enumerate(iniShortNameList):
         fileO.write(str(i+1)+':'+x+'\t')
   fileO.write('\n\nAnc\tG1\tG2\toutgps\t[#rea_G1(micro),#reaG2,#rea?]\n(D,DO)\t\t\t\t')
   for delta in deltaPairs:
      fileO.write(str(delta)+'\t')
   fileO.write('\n')
   for ancName in sorted(dicoAnc.keys()):
      fileO.write('\n')
      for (name1,name2,outgroups,deltas,macros,micros) in dicoAnc[ancName]:
         fileO.write(ancName+'\t'+name1+'\t'+name2+'\t'+outgroups+'\t')
         indp=0
         zdmm=zip(deltas,macros,micros)
         zdmm.sort()
         for delta,macro,micro in zdmm:
            ind=deltaPairs.index(delta)
            j=ind-indp
            indp=ind
            for i in range(j):
               fileO.write('\t')
            fileO.write('['+macro[0]+'('+micro[0]+'), '+macro[1]+'('+micro[1]+'), '+macro[2]+'('+micro[2]+')]')
         fileO.write('\n')
   fileO.close()

if __name__=="__main__":
   _main_(sys.argv)
   #_main_([0,'Lachancea'])
   #_main_([0,'Yeast'])
   #_main_([0,'Vertebrate'])

