import sys
import random
import argparse
import collections
import fileinput
import math
import re
import os
import inspect



 # realpath() will make your script run, even if you symlink it :)
cmd_folder = os.path.realpath(os.path.abspath(os.path.split(inspect.getfile( inspect.currentframe() ))[0]))
if cmd_folder not in sys.path:
     sys.path.insert(0, cmd_folder)

 # use this if you want to include modules from a subfolder
cmd_subfolder = os.path.realpath(os.path.abspath(os.path.join(os.path.split(inspect.getfile( inspect.currentframe() ))[0],"../Modules")))
if cmd_subfolder not in sys.path:
     sys.path.insert(0, cmd_subfolder)

from truncationobject import *


def get_maxfrequency(truncations):
     dict=collections.defaultdict(lambda:0)
     for t in truncations:
          if t.popfreq >dict[t.key]:
               dict[t.key]=t.popfreq
     return dict


parser = argparse.ArgumentParser(description="""           
Description
-----------
Summary statistics
""",formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""

Authors
-------
    Robert Kofler
""")
parser.add_argument('--data', type=str, required=True, dest="data", help="Data")
parser.add_argument("--base", type=str, required=True, dest="base", help="Base population")
parser.add_argument("--label", type=str, required=True, dest="label", help="Labels")
parser.add_argument("--ignore-base", required=False,default=False,action="store_true",dest="ignorebase", help="Base population")
args = parser.parse_args()

ignorebase=args.ignorebase
label=args.label

# levels: replicate - key - generations


datal=TruncationReader.readall(args.data)
basel=TruncationReader.readall(args.base)

basekeys=set([])
for t in basel:
     basekeys.add(t.key)
     
freqdict=get_maxfrequency(datal)

covdict=collections.defaultdict(lambda:0.0)
weighdict=collections.defaultdict(lambda:0.0)

for key,freq in freqdict.items():
     if ignorebase and key in basekeys:
          continue
     start,end=[int(i) for i in key.split("-")]
     for i in range(start,end+1):
          testcov=covdict[i]+1.0
          covdict[i]=testcov
          testcov=weighdict[i]+freq
          weighdict[i]=testcov

covarea=0.0
maxcov=0.0
weigharea=0.0
maxweigh=0.0
for pos, cov in covdict.items():
     covarea+=cov
     if cov>maxcov:
          maxcov=cov
for pos, weigh in weighdict.items():
     weigharea+=weigh
     if weigh>maxweigh:
          maxweigh=weigh



for i in range(1,2908):
     cov=covdict[i]
     if cov <1:
          continue
     weigh=weighdict[i]
     rcov=(1000000.0*cov)/covarea
     rweigh=(1000000*weigh)/weigharea
     enr=rcov/rweigh
     print "{0}\t{1}\t{2}\t{3}\t{4}".format(label,i,rcov,rweigh,enr)
     







