#!/bin/env python

#This script is to be used for phasing (using trio) the deletion events for NA19240. 
#The first argument is the text file that contains genotype information from CNVnator.
#It is NA19240_GenoTyp.txt
#The second argument is the output, i.e., the phased deletions.
#The third argument is the output file for those that cannot be phased. 
#README file has example run with NA12878 files.

from __future__ import print_function
from __future__ import division
import sys
import os

#The way this script is set up, for NA19240, gtNum3 is the child(NA19240) and father is gtNum2 (NA19239).
#gtNum1 is mother. Note that the logic below continues to print phased as P|M (paternal | Maternal).
GTfle = open(sys.argv[1])
phsFle = open(sys.argv[2], "a")
notPhsFle = open(sys.argv[3], "a")

for line in GTfle:
  gtNum1 = float(line.split()[-2])
  gtNum2 = float(next(GTfle).split()[-2])
  gtNum3 = float(next(GTfle).split()[-2])
  if 0.5 <= gtNum3 <= 1.5: #possibly HET deletion
    flds = line.split()[1].split(":")
    lrc = flds[1].split("-") #left right coordinates
    coord = flds[0]+"\t"+lrc[0]+"\t"+lrc[1]
    
    if 1.5 < gtNum2: #0/0
      if gtNum1 <= 1.5: #0/1 or 1/1      
        print("{0}\t{1}".format(coord, "0|1"), file=phsFle)
      else: #inconsistent
        print(coord, "inCons")
    elif 0.5 <= gtNum2 <= 1.5: #0/1 
      if 1.5 < gtNum1: #0/0
        print("{0}\t{1}".format(coord, "1|0"), file=phsFle)
      elif gtNum1 < 0.5: #1/1
        print("{0}\t{1}".format(coord, "0|1"), file=phsFle)
      else:
        print(coord, file=notPhsFle)
    elif gtNum2 < 0.5: #1/1 
      if 0.5 <= gtNum1: #0/1 or 0/0
        print("{0}\t{1}".format(coord, "1|0"), file=phsFle)
      else: #inconsistent
        print(coord, "inCons")
      
GTfle.close()
