#!/usr/bin/python
# -*- coding: utf-8 -*-
## Supprime les blocs duplique et les blocs sans ortho chez les outgroups

import re

def readSynts(pathPart2,name1,name2):
   blocsList=[]
   blocs1List=[]
   blocs2List=[]
   fileSynt=open(pathPart2+name1+'.'+name2+'.orth.synt','r')
   blocs,blocs1,blocs2,ortho1,ortho2,new2old1,new2old2=readSynt(fileSynt)
   nbMiBl=len(blocs)
   print '\nThere are',len(blocs),'initial micro synteny blocks'
   blocs,blocs1,blocs2=delIncluded(blocs,blocs1,blocs2,ortho1,ortho2)  # A
   print len(blocs),"left after deletion of the 'included' ones"
   blocs,blocs1,blocs2=del2Choix(blocs,blocs1,blocs2,ortho1,ortho2)    # B
   print len(blocs),"left after deletion of the 'duplicated' ones"
   blocs,blocs1,blocs2=delACheval(blocs,blocs1,blocs2,ortho1,ortho2)   # C
   print len(blocs),"left after deletion of the 'à cheval' ones"
   
   list1,list2=listBlocsOrdered(blocs,blocs1,blocs2)                   # D
   still_chev1,still_chev2=still_chev(list1,blocs1),still_chev(list2,blocs2)
   while still_chev1 or still_chev2:
      if still_chev1:
         blocs,blocs1,blocs2=delChev(list1,blocs,blocs1,blocs2,ortho1,ortho2,new2old1,0)
         blocs,blocs1,blocs2=delIncluded(blocs,blocs1,blocs2,ortho1,ortho2)
         list1,list2=listBlocsOrdered(blocs,blocs1,blocs2)
         still_chev1,still_chev2=still_chev(list1,blocs1),still_chev(list2,blocs2)
      if still_chev2:
         blocs,blocs2,blocs1=delChev(list2,blocs,blocs2,blocs1,ortho2,ortho1,new2old2,1)
         blocs,blocs1,blocs2=delIncluded(blocs,blocs1,blocs2,ortho1,ortho2)  #1
         list1,list2=listBlocsOrdered(blocs,blocs1,blocs2)
         still_chev1,still_chev2=still_chev(list1,blocs1),still_chev(list2,blocs2)

   print 'Finally, there are ',len(blocs),'non overlapping blocks'
   return blocs,blocs1,blocs2,ortho1,ortho2,new2old1,new2old2,nbMiBl

## on lit les blocs
def readSynt(fileSynt):            
   blocs=[]
   blocs1=[]
   blocs2=[] # lists des bornes des blocs
   new2old1={}
   new2old2={} # dico des genes: insynthon=oldID
   dicoOrthos1={} # dico des ortho
   dicoOrthos2={}
   for line in fileSynt:
      lineList=re.split(' ',line)
      # chr,orientation,%similarity,nb
      sim=0.
      for i in range(len(lineList)/5):
         sim+=int(lineList[i*5+6])
      sim/=(i+1)
      for i in range(len(lineList)/5):
         if int(lineList[i*5+3]) not in new2old1:
            new2old1[int(lineList[i*5+3])]=int(lineList[i*5+2])
            dicoOrthos1[int(lineList[i*5+3])]=[(int(lineList[i*5+6]),int(lineList[i*5+5]))]
         else:
            dicoOrthos1[int(lineList[i*5+3])].append((int(lineList[i*5+6]),int(lineList[i*5+5])))
         if int(lineList[i*5+5]) not in new2old2:
            new2old2[int(lineList[i*5+5])]=int(lineList[i*5+4])
            dicoOrthos2[int(lineList[i*5+5])]=[(int(lineList[i*5+6]),int(lineList[i*5+3]))]
         else:
            dicoOrthos2[int(lineList[i*5+5])].append((int(lineList[i*5+6]),int(lineList[i*5+3])))
      blocs.append([int(lineList[0]),int(lineList[1]),sim,i+1])
      blocs1.append([int(lineList[3]),int(lineList[-4])])
      if int(lineList[1]):
         blocs2.append([int(lineList[5]),int(lineList[-2])])
      else:
         blocs2.append([int(lineList[-2]),int(lineList[5])])
   return blocs,blocs1,blocs2,dicoOrthos1,dicoOrthos2,new2old1,new2old2


def delBlocdeBlocs(listRemove,blocs,blocs1,blocs2,ortho1,ortho2):
   listRemove.sort(reverse=True)
   for i in listRemove:
      for j in range(blocs1[i][1]-blocs1[i][0]+1):
         g1=blocs1[i][0]+j
         if blocs[i][1]:
            g2=blocs2[i][0]+j
         else:
            g2=blocs2[i][1]-j
         simg2=filter(lambda x:x[1]==g2,ortho1[g1])[0]
         ortho1[g1].remove(simg2)
         simg1=filter(lambda x:x[1]==g1,ortho2[g2])[0]
         ortho2[g2].remove(simg1)
      blocs.pop(i)
      blocs1.pop(i)
      blocs2.pop(i)
   return blocs,blocs1,blocs2

################"
## A ## on supprime les inclus ds un (homologie)
def incl(bornes,liste):
   for l in liste:
      if ((bornes[0]>=l[0] and bornes[1]<l[1]) or
        (bornes[0]>l[0] and bornes[1]<=l[1])):
         return True
   return False
def delIncluded(blocs,blocs1,blocs2,ortho1,ortho2):
   listRemove=[]
   for i in range(len(blocs)):
      if incl(blocs1[i],blocs1) or incl(blocs2[i],blocs2):
         listRemove.append(i)
   return delBlocdeBlocs(listRemove,blocs,blocs1,blocs2,ortho1,ortho2)

#################
## B ## si plusieurs choix (duplication) 1 over 0 and max% (entre 2 rouge et 2 vert on garde les verts)
#(1 gene duplique present in both genomes)
def groupSame(blocs):
   group=[]
   did=[]
   for i in range(len(blocs)-1):
      if i not in did:
         groupi=[i]
         for j in range(i+1,len(blocs)):
            if blocs[i]==blocs[j]:
               groupi.append(j)
               did.append(j)
         if len(groupi)!=1:
            group.append(groupi)
   return group
def del2Choix(blocs,blocs1,blocs2,ortho1,ortho2):
   groupSameBloc1=groupSame(blocs1)
   groupSameBloc2=groupSame(blocs2)
   listRemove=[]
   for group in groupSameBloc1+groupSameBloc2:
      liste=[]
      for i in group:
         liste.append([abs(blocs1[i][0]-blocs2[i][0]),1-blocs[i][1],-1*blocs[i][2],i])
      liste.sort()
      for i in range(1,len(liste)):
         if liste[i][3] not in listRemove:
            listRemove.append(liste[i][3])
   return delBlocdeBlocs(listRemove,blocs,blocs1,blocs2,ortho1,ortho2)

#################
## C ##et ceux sans genes specific
def belongTo(g,liste):
   res=0
   for bornes in liste:
      if g in range(bornes[0],bornes[1]+1):
         res+=1
   return res
def delACheval(blocs,blocs1,blocs2,ortho1,ortho2):
   listRemove=[]
   for i in range(len(blocs)):
      s=blocs[i][1]
      if (blocs[i][3]==2 and
         ((belongTo(blocs1[i][0],blocs1)==2 and belongTo(blocs1[i][1],blocs1)==2) or
            (belongTo(blocs2[i][0],blocs2)==2 and belongTo(blocs2[i][1],blocs2)==2) or
            (s and belongTo(blocs1[i][0],blocs1)==2 and belongTo(blocs2[i][1],blocs2)==2) or
            (s and belongTo(blocs1[i][1],blocs1)==2 and belongTo(blocs2[i][0],blocs2)==2) or
            (not s and belongTo(blocs1[i][0],blocs1)==2 and belongTo(blocs2[i][0],blocs2)==2) or
            (not s and belongTo(blocs1[i][1],blocs1)==2 and belongTo(blocs2[i][1],blocs2)==2))):
         listRemove.append(i)
   return delBlocdeBlocs(listRemove,blocs,blocs1,blocs2,ortho1,ortho2)


#############################
# creation of list blocs:
#############################
def listBlocsOrdered(blocs,blocs1,blocs2):
   list1=[]
   list2=[]
   for i,bloc in enumerate(blocs):
      list1.append(i+1)
      if bloc[1]:
         signe=1
      else:
         signe=-1
      ins=0
      for j,b in enumerate(list2):
         if blocs2[i][0]<blocs2[abs(b)-1][0]:
            list2.insert(j,(i+1)*signe)
            ins=1
            break
      if not ins:
         list2.append((i+1)*signe)
   return list1,list2

## Are there still some duplicated tandems?
def still_chev(liste,blocs12):
   for i in range(len(liste)-1):
      bornesa=blocs12[abs(liste[i])-1]
      bornesb=blocs12[abs(liste[i+1])-1]
      if blocs12[abs(liste[i+1])-1][0]<=blocs12[abs(liste[i])-1][1]:
         return True
   return False

#################
## D ## We delete the tandem duplicated genes
def delChev(liste1,blocs,blocs1,blocs2,ortho1,ortho2,new2old1,g):
   listRemove=[]
   for i in range(len(liste1)-1):
      bloc1_i=blocs1[abs(liste1[i])-1]
      bloc1_i1=blocs1[abs(liste1[i+1])-1]
      bloc2_i=blocs2[abs(liste1[i])-1]
      bloc2_i1=blocs2[abs(liste1[i+1])-1]
      if bloc1_i1[0]<=bloc1_i[1]:
         l=bloc1_i[1]-bloc1_i1[0]+1
         Sim_i=0
         Sim_i1=0
         for g1 in range(bloc1_i1[0],bloc1_i[1]+1):
            ortho1[g1].sort()
            for g2 in ortho1[g1]:
               if g2[1] in range(bloc2_i[0],bloc2_i[1]+1):
                  Sim_i+=g2[0]
               elif g2[1] in range(bloc2_i1[0],bloc2_i1[1]+1):
                  Sim_i1+=g2[0]
         if Sim_i<=Sim_i1:   ## tous les genes vont dans b
            n=blocs[abs(liste1[i])-1][3]
            if n!=l:
               blocs[abs(liste1[i])-1][2]=blocs[abs(liste1[i])-1][2]*n*1./(n-1)-Sim_i*1./(n-l)
               blocs[abs(liste1[i])-1][3]-=l
               blocs1[abs(liste1[i])-1][1]-=l
               if blocs[abs(liste1[i])-1][1]:#bloc positif
                  blocs2[abs(liste1[i])-1][1]-=l
               else:
                  blocs2[abs(liste1[i])-1][0]+=l
            else:
               listRemove.append(abs(liste1[i])-1)
         else:              ## tous les genes vont dans a
            n=blocs[abs(liste1[i+1])-1][3]
            if n!=l:
               blocs[abs(liste1[i+1])-1][2]=blocs[abs(liste1[i+1])-1][2]*n*1./(n-1)-Sim_i1*1./(n-l)
               blocs[abs(liste1[i+1])-1][3]-=l
               blocs1[abs(liste1[i+1])-1][0]+=l
               if blocs[abs(liste1[i+1])-1][1]:#bloc positif
                  blocs2[abs(liste1[i+1])-1][0]+=l
               else:
                  blocs2[abs(liste1[i+1])-1][1]-=l
            else:
               listRemove.append(abs(liste1[i+1])-1)
   return delBlocdeBlocs(listRemove,blocs,blocs1,blocs2,ortho1,ortho2)



