#!/usr/bin/env python
import os
import sys
import inspect
import re
import argparse
import random
import math
import collections
import fileinput




parser = argparse.ArgumentParser(description="""           
Description
-----------
Summary statistics
""",formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""

Authors
-------
    Robert Kofler
""")

class TE:
    def __init__(self,sid,chrm,pos,strand,fam,order,sup,com,popfreq):
        self.sid=sid
        self.chr=chrm
        self.pos=pos
        self.strand=strand
        self.fam=fam
        self.ord=order
        self.sup=sup
        self.com=com
        self.popfreq=popfreq
        self.trunc=0

def read_popte2(file,filterchr):
    """
    0   1           2   3   4       5   6   7   8
    1	contig_4615	384	-	INE-1	TIR	F	-	0.400
    1	contig_1105	438	+	HeT-A	non-LTR	F	-	0.001
    1	contig_4614	211	-	Rt1c	non-LTR	F	-	0.040
    """
    techrh=collections.defaultdict(lambda:[])
    for l in open(file):
        a=l.rstrip("\n").split("\t")
        sid=a[0]
        chrm=a[1]
        pos=int(a[2])
        strand=a[3]
        fam=a[4]
        order=a[5]
        sup=a[6]
        com=""
        popfreq=float(a[8])
        if fam !="P-element":
            continue
        
        if chrm not in filterchr:
            continue
        t=TE(sid,chrm,pos,strand,fam,order,sup,com,popfreq)
        techrh[chrm].append(t)
    return techrh
    
    


parser.add_argument('--pt2', type=str, default=None,dest="pt2", required=True, help="A ping-pong signature file")
parser.add_argument("--sam", type=str, required=True, dest="sam",  help="sam file with mates")
parser.add_argument('--cond', type=str, default=None,dest="cond", required=True, help="A ping-pong signature file")
parser.add_argument("--gen", type=str, required=True, dest="gen",  help="sam file with mates")
parser.add_argument("--rep", type=str, required=True, dest="rep",  help="sam file with mates")
parser.add_argument("--md", type=int, required=True, dest="md",  help="maxdistance to trunc")
args = parser.parse_args()

euchr=set(["X","2L","2R","3L","3R","4"])
tes=read_popte2(args.pt2, euchr)
#print(tes)
md=int(args.md)

for l in open(args.sam):
    a=l.rstrip("\n").split("\t")
    """
    HWI-D00689_0065:6:2113:15567:55126#CTGAAGCT_AGGCGAAG	113	X	7321617	84	59M1D41M	PPI251	285	0	TCCCAAGTCATCGATGTTTCATGACTATCGCACTCGCTGGTGCACTGTTACTTTATTTATTTTTTTTATTATTTATTTCAGATGAAATAAAGATTATGTG	FFFFFFFFBFFFFFFFFFFFFFFFFFFFFFFFFBFFBFFFFFFFFFFFFFBFBFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFBBBB<	AS:i:89
    """
    chrm=a[2]
    start=int(a[3])
    end=start+99
    mchr=a[6]
    if mchr !="PPI251":
        raise Exception("invalid mate not to P-element")
    
    toparse=tes[chrm]
    for te in toparse:
        tepos=te.pos
        distance=None
        if(tepos>end):
            distance=tepos-end
        elif(tepos<start):
            distance=start-tepos
        else:
            distance =0
        
        if distance <=md:
            te.trunc=1
            
cond=args.cond
gen=args.gen
rep=args.rep

for chrm, tmp in tes.items():
    for t in tmp:
        print "{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}\t{7}".format(cond,rep,gen,t.chr,t.pos,t.fam,t.popfreq,t.trunc)
    



















