#!/usr/bin/env python
import os
import sys
import re
import argparse
import random
import collections


class PopTE2:
    def __init__(self,chrm,pos,strand,sup,popfreq,tol):
        self.chrm=chrm
        self.pos=pos
        self.strand=strand
        self.sup=sup
        self.popfreq=popfreq
        self.tol=tol

    def start(self):
        return self.pos-self.tol

    def end(self):
        return self.pos+self.tol


def loadpt2(file,tol):
    #   0   1           2       3   4           5   6   7   8
    #   1	contig_170	1621453	+	P-element	TIR	F	-	0.177
    #   1	contig_26	5340773	+	P-element	TIR	FR	-	0.120
    #   1	contig_26	3058047	+	P-element	TIR	F	-	0.163
    toret=collections.defaultdict(lambda: [])
    for l in open(file):
        a=l.rstrip("\n").split("\t")
        chrm,pos,strand,sup,popfreq=a[1],int(a[2]),a[3],a[6],float(a[8])
        pt2=PopTE2(chrm,pos,strand,sup,popfreq,tol)
        toret[chrm].append(pt2)
    return(toret)


            
    

parser = argparse.ArgumentParser(description="""           
Description
-----------
    Check if a P-element insertion is in a piRNA cluster""",formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Prerequisites
-------------
    python version 3+

Authors
-------
    Robert Kofler 
""")


parser.add_argument("--pt2", type=str, required=True, dest="pt2", default=None, help="popte2 file")
parser.add_argument("--cli", type=str, required=True, dest="cli", default=None, help="te insetions")
parser.add_argument("--tol", type=int, required=True, dest="tol", default=None, help="tolerance in position estimate, eg 20")

args = parser.parse_args()

pt2l=loadpt2(args.pt2,args.tol)
#print(pt2l)



for l in open(args.cli):
    """
    # 0     1   2   3           4       5   6       7   8   9   10                  11          12
    dere	R1	20	contig_232	1319408	6	2021	F	fwd	1	39.3993230378	0.615584415584	0.513006495993
    dere	R1	20	contig_26	428194	2	2437	R	rev	1	809.937512162	-0.875418535599	0.0576549918816
    """
    l=l.rstrip("\n")
    a=l.split("\t")
    chrm,pos=a[3],int(a[4])

    ib=None
    bedlist=pt2l[chrm]
    #print(chrm,pos)
    for p in bedlist:
        start=p.start()
        end=p.end()
        if pos>=start and pos<=end:
            ib=p
            break

    if ib is not None:
        topr=a
        topr.extend([str(ib.pos),ib.strand,ib.sup,str(ib.popfreq)])

        print("\t".join(topr))
        
    