#!/usr/bin/env python
import os
import sys
import re
import argparse
import random
import collections




def clustersForChr(toparse, threshold,binsize):
    ## toparse hash: position -> clusterreads
    
    toret=[] # list of start and end positions
    lhs=0  # last high score
    lhsp=-binsize # last high score position
    lastpos=sorted(toparse.keys())[-1] # get the last position
    
    while(lastpos!=lhsp):
        # for all piRNA clustes that could be found on the chromosome
        aktive=False
        runningscore=0  # actual score at the moment
        clusterstart=0
        for pos in range(lhsp+binsize,lastpos+binsize,binsize):
            localscore=toparse[pos]-threshold
            
            if(pos==lastpos):
                if(aktive):
                    toret.append((clusterstart,lhsp,lhs))
                lhsp=lastpos
                break

            if aktive:
                runningscore=runningscore+localscore
                if(runningscore>=lhs):
                    lhs=runningscore
                    lhsp=pos
                elif(runningscore>0):
                    pass
                else:
                    aktive=False
                    toret.append((clusterstart,lhsp,lhs))
                    break
                    
            else:
                if(localscore>0):
                    lhs=localscore
                    runningscore=localscore
                    lhsp=pos
                    clusterstart=pos
                    aktive=True
    return toret
    
    


parser = argparse.ArgumentParser(description="""           
Description
-----------
    This script simulates single-end reads from the population genome""",formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Prerequisites
-------------
    python version 3+

Authors
-------
    Robert Kofler 
""")


parser.add_argument("--bin", type=str, required=True, dest="bin", default=None, help="binsize")
parser.add_argument("--threshold", type=float, required=True, dest="threshold", default=None, help="binsize")
parser.add_argument("--sid", type=str, required=True, dest="sampleid", default=None, help="sample id")
parser.add_argument("--max-bin-score", type=float, required=False, default=1000000000.0, dest="maxbinscore", help="max bin score")
parser.add_argument("--binsize", type=int, required=True, dest="binsize", default=None, help="binsize")
# option is to introduce maxscore

args = parser.parse_args()

binsize=args.binsize
threshold=args.threshold
mbs=args.maxbinscore
"""
650399182	100	114
650396740	0	48
650396740	100	74
650396740	200	99

"""

ph=collections.defaultdict(lambda:collections.defaultdict(lambda:0))

for l in open(args.bin):
    a=l.rstrip("\n").split("\t")
    # 0     1   2   3   4
    # U_22	700	799	3	1.16983327536
    # U_22	800	899	2	0.779888850241
    
    chrm,pos,count=a[0],int(a[1]),float(a[4])
    
    # maxbinscore
    if count>mbs:
        count=mbs
    
    ph[chrm][pos]=count
    
for chrm,tmp in ph.items():
   
    clusters=clustersForChr(tmp,threshold,binsize)
    for c in clusters:
        print("{0}\t{1}\t{2}\t{3}\t{4}".format(args.sampleid,chrm,c[0],c[1]+binsize,c[2]))