#!/bin/env python

#This script is to be used for phasing (using trio) the deletion events in NA12878. 
#The first argument is the text file that contains genotype information from CNVnator.
#The second argument is the output, i.e., the phased deletions.
#The third argument is the output file for those that cannot be phased. 
#See README file for example run. 

from __future__ import print_function
from __future__ import division
import sys
import os

GTfle = open(sys.argv[1])
phsFle = open(sys.argv[2], "a")
notPhsFle = open(sys.argv[3], "a")

for line in GTfle:
  gtNum1 = float(line.split()[-2])
  gtNum2 = float(next(GTfle).split()[-2])
  gtNum3 = float(next(GTfle).split()[-2])
  if 0.5 <= gtNum1 <= 1.5: #possibly HET deletion
    flds = line.split()[1].split(":")
    lrc = flds[1].split("-") #left right coordinates
    coord = flds[0]+"\t"+lrc[0]+"\t"+lrc[1]
    
    if 1.5 < gtNum2: #0/0
      if gtNum3 <= 1.5: #0/1 or 1/1      
        print("{0}\t{1}".format(coord, "0|1"), file=phsFle)
      else: #inconsistent
        print(coord, "inCons")
    elif 0.5 <= gtNum2 <= 1.5: #0/1 
      if 1.5 < gtNum3: #0/0
        print("{0}\t{1}".format(coord, "1|0"), file=phsFle)
      elif gtNum3 < 0.5: #1/1
        print("{0}\t{1}".format(coord, "0|1"), file=phsFle)
      else:
        print(coord, file=notPhsFle)
    elif gtNum2 < 0.5: #1/1 
      if 0.5 <= gtNum3: #0/1 or 0/0
        print("{0}\t{1}".format(coord, "1|0"), file=phsFle)
      else: #inconsistent
        print(coord, "inCons")
      
GTfle.close()
