#!/usr/bin/env python
import os
import sys
import re
import argparse
import random
import math
import collections


class ClusterInsertion:
	def __init__(self,speci,rep,gen,run,chrm,pos,pkey,sup,strand,cluins):
		self.speci=speci
		self.rep=rep
		self.gen=gen
		self.run=run
		self.chrm=chrm
		self.pos=pos
		self.pkey=pkey
		self.sup=sup
		self.strand=strand
		self.cluins=cluins

	def support_len(self):
		pk=self.pkey
		topr=[pk]
		if " " in pk:
			topr=pk.split(" ")

		sup=0
		for t in topr:
			t=t.lstrip("+-")
			#print(t)
			start,end=t.split("..")
			
			start,end=int(start),int(end)
			assert(end>start)
			sup+=end-start
		return sup

def assert_clu(current):
	first=current[0]
	lastpos=first.pos
	lastchrom=first.chrm
	laststrand=first.strand
	for c in current:
		assert(c.chrm==lastchrom)
		#assert(c.strand==laststrand)
		distance=c.pos-lastpos
		lastpos=c.pos
		assert(distance<50)



	

def processcli(current):
	assert_clu(current)
	bestsuplen=0
	bestsup=""
	readsup=len(current)
	posl=[]
	prevpos=None

	for c in current:
		if c.support_len()>bestsuplen:
			bestsuplen=c.support_len()
		if len(c.sup)>len(bestsup):
			bestsup=c.sup
		posl.append(c.pos)

	avpos=float(sum(posl))/float(len(posl))
	avpos=int(avpos)
	f=current[0]

	topr=[f.speci,f.rep,f.gen,f.chrm,str(avpos),str(readsup),str(bestsuplen),bestsup,f.strand,f.cluins]
	print("\t".join(topr))

	


def loadcli(file):

	toret=collections.defaultdict(lambda: [])
	for l in open(file):
		a=l.rstrip("\n").split("\t")
		"""
		dere	R1	G20	run1	contig_232	1319415	+0..849	F	fwd	1
		dere	R1	G20	run1	contig_232	1319415	-0..492	R	fwd	1
		dere	R1	G20	run1	contig_26	428194	+0..2437	F	rev	1
		"""
		speci,rep,gen,run, chrm,pos,pkey,sup,strand,cli=a
		if cli=="0":
			continue
		pos=int(pos)
		key="{0}:{1}:{2}:{3}".format(speci,rep,gen,chrm)
		ci=ClusterInsertion(speci,rep,gen,run,chrm,pos,pkey,sup,strand,cli)
		toret[key].append(ci)

	return(toret)


            
    

parser = argparse.ArgumentParser(description="""           
Description
-----------
    Check if a P-element insertion is in a piRNA cluster""",formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Prerequisites
-------------
    python version 3+

Authors
-------
    Robert Kofler 
""")


parser.add_argument("--cli", type=str, required=True, dest="bed", default=None, help="bed")
parser.add_argument("--pos-tol", type=int, required=True, dest="tol", default=None, help="te insetions")


args = parser.parse_args()

cli=loadcli(args.bed)
tol=args.tol



for key,clis in cli.items():
	current=[]
	clis=sorted(clis,key=lambda i:i.pos)
	for c in clis:
		if(len(current)==0):
			current.append(c)
			continue
		distance = c.pos - current[-1].pos
		assert(distance>=0)
		if(distance<tol):
				current.append(c)
		else:
			processcli(current)
			current=[c]
	if(len(current)>0):
		processcli(current)

