#!/bin/env python

#Script for creating regions useful for vcf-isec commands
#The regions are created as chr:leftLimit-leftBrkpnt followed by 
#chr:rightBrkpnt-rightLimit. The brkpnts are in the form: ( )
#Three arguments are being given. 
#The first is the file containing selected TruSeq fragments that align perfectly with 
#deletion event coordinates. 
#The second is the file containing the genotype information generated (for the deletions) by cnvnator.
#But if we already know the gt information (i.e., heterozygosity) w/o using cnvnator, 
#we can just pass 0 for the argv[2] (second argument). Then this script will not look for an extra file.
#The third argument is the output text file containing the flanking regions. 

from __future__ import print_function
import sys


def xtract():
  flds = enInfo.split()
  chrom = flds[2].strip(",")
  exCoL = int(flds[5].strip("(,"))
  exCoR = int(flds[6].strip(")"))
  lStrt = exCoL-lefL+1 
  rEnd = exCoR+ritL-1 #note added: aug 6, 2015; to understand the +/-1 in these statements, write eqns. in terms of lefL and ritL!

  gtStr = chrom+":"+str(exCoL)+"-"+str(exCoR-1)
  if sys.argv[2] == "0":
    gtFlg = "HET"
  else:
    GTfle = open(sys.argv[2]) #GT for genotype information; it's being obtained from file created using cnvnator.
    for line in GTfle:
      if gtStr in line:
        gtNum1 = float(line.split()[-2])
        gtNum2 = float(next(GTfle).split()[-2])
        gtNum3 = float(next(GTfle).split()[-2])
        break
    GTfle.close()
  
  #The conditions following and check that there is at least one HET deletion in the parents
    if 0.5 <= gtNum1 <= 1.5 and (0.5 <= gtNum2 or 0.5 <= gtNum3) and (gtNum2 <= 1.5 or gtNum3 <= 1.5):       
      gtFlg = "HET"
    else:
      gtFlg = "DC" #DC for Don't Care

  return (chrom.strip("chr"), lStrt, exCoL, exCoR, rEnd, gtFlg)

#argv[1] is typically prfSeqs.txt file
fhSeqs = open(sys.argv[1])
regns = open(sys.argv[3], "a")
while True:
  try:
    for i in range(3):
      next(fhSeqs)
    seq = next(fhSeqs)
    (lef, ex, rit) = seq.partition("EX")
    lefL = len(lef) 
    ritL = len(rit.rstrip())
    enInfo = next(fhSeqs)
    tupPrnt = xtract() #tupPrnt is a tuple.
    if tupPrnt[5] == "HET":
      print("{0}:{1}-{2}".format(tupPrnt[0], tupPrnt[1], tupPrnt[2]), file=regns)
      print("{0}:{1}-{2}".format(tupPrnt[0], tupPrnt[3], tupPrnt[4]), file=regns)
  except StopIteration:
    break 

