#!/usr/bin/env python
import os
import sys
import re
import argparse
import random
import collections


def load_hierarchy(file):
    """
        insert	fb	family	suborder	order	class
    DME9736	FBgn0026065	Idefix	LTR	LTR	RNA
    DMIS176	FBgn0000004	17.6	LTR	LTR	RNA
    DMTN1731	FBgn0000007	1731	LTR	LTR	RNA
    """
    hier={}
    for l in open(file):
        if l.startswith("id"):
            continue
        a=l.rstrip("\n").split("\t")
        ins=a[0]
        fam=a[2]
        hier[ins]=fam
    return hier
    


parser = argparse.ArgumentParser(description="""           
Description
-----------
    This script simulates single-end reads from the population genome""",formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Prerequisites
-------------
    python version 3+

Authors
-------
    Robert Kofler 
""")


parser.add_argument("--input", type=str, required=True, dest="input", default=None, help="input")
parser.add_argument("--hier", type=str, required=True, dest="hier", default=None, help="hierachy")

args = parser.parse_args()
hier=load_hierarchy(args.hier)

totcount=0
tecount=0
chrcount=0
tehash=collections.defaultdict(lambda: 0)
for l in open(args.input):
    """
        *	500
    1360	1
    1360_dsim_m113	3
    1360_dsim_m134	2
    """
    a=l.rstrip("\n").split("\t")
    id,count=a[0],int(a[1])
    if id=="*":
        continue
    totcount+=count
    if(id in hier):
        teid=hier[id]
        tehash[teid]+=count
        tecount+=count
    else:
        chrcount+=count


print "totcount\t{0}".format(totcount)
print "chrcount\t{0}".format(chrcount)
print "tecount\t{0}".format(tecount)
its=tehash.items()
its=sorted(its,key=lambda a:-a[1])
for te,c in its:
    print "{0}\t{1}".format(te,c)
    
    
    