#!/usr/bin/env python
import os
import sys
import inspect
import re
import argparse
import random
import math
import collections
import fileinput




parser = argparse.ArgumentParser(description="""           
Description
-----------
Summary statistics
""",formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""

Authors
-------
    Robert Kofler
""")
parser.add_argument('--sam', type=argparse.FileType('r'), default=None,dest="sam", required=True, help="A sam file")
parser.add_argument("--min-mq", type=int, required=False, dest="minmq", default=0, help="min mapping quality")
parser.add_argument("--max-mm", type=int, required=False, dest="maxmm", default=10, help="max mismatches")
parser.add_argument("--rep", type=str, required=True, dest="rep",  help="replicate")
parser.add_argument("--gen", type=str, required=True, dest="gen",  help="generation")
parser.add_argument("--cond", type=str, required=True, dest="cond",  help="generation")
args = parser.parse_args()
minmq=args.minmq
maxmm=args.maxmm
mistart=21
miend=23
pistart=23
piend=28

telengs={"M14653":1286,"DME9736":7411,"DMIS176":7439,"DMTN1731":4648,"DMIS297":6995,"DM23420":6126,"412":7567,"DMAURA":4263,"DMBARI1":1728,"BS":5142,"DMU89994":6411,"DMCOPIA":5143,"DMW1DOC":4725,"F":4708,"FB":4347,"DMTNFB":1106,"DMREPG":4346,"DMGYPF1A":7469,"DMHFL1":2959,"DMTHB1":1653,"DM06920":6083,"DMIFACA":5371,"DMLINEJA":5020,"DMTRDNA":1435,"DMRTMGD1":7480,"DMMDG3":5519,"DMDM11":5461,"PPI251":2907,"DMPOGOR11":2121,"DMRER1DM":5356,"DMRER2DM":3607,"DM33463":1736,"SPRINGER":7546,"TARTC":0,"AY561850":0,"DM14101":0,"TIRANT":8526,"DMBLPP":5034,"OPUS":7521,"DM_ROO":9092,"BLOOD":7410,"DMZAM":8435,"DME010298":8507,"ROXELEMENT":4740,"AF222049":5249,"CIRC":7450,"DME278684":5108,"RT1B":5171,"QUASIMODO":7387,"Beagle":7062,"Tinker":6112,"TABOR":7345,"STALKER":7256,"INE1":611,"GTWIN":7411,"GYPSY2":6841,"ACCORD":7404,"1360":3409,"GYPSY3":6973,"INVADER":4032,"INVADER2":5124,"INVADER3":5484,"G2":3102,"DMCR1A":4470,"TC1":1666,"DOC2":4789,"DOC3":4740,"IVK":5402,"RT1C":5443,"GYPSY4":6852,"INVADER4":3105,"BAGGINS":5453,"G3":4605,"MARINER2":912,"TRANSIB1":2167,"TRANSIB3":2883,"TRANSIB2":2844,"GYPSY5":7369,"GYPSY6":7826,"INVADER5":4038,"DIVER2":4917,"TRANSIB4":2656,"S2":1735,"DM88":4558,"JUAN":4236,"FROGGER":2483,"ROVER":7318,"DMTOM1_LTR":410,"G5_DM":4856,"G4_DM":3856,"ROOA_LTR":7621,"JOCKEY2":3428,"G6_DM":2042,"LOOPER1_DM":1881,"AF418572":804,"QBERT":7650,"McCLINTOCK":6450,"STALKER4":7359,"HOPPER2":1593,"STALKER2":7672,"STALKER3":372,"AF541951":1064,"DME487856":8556,"BS3":1790,"BS4":754,"DOC4":2791,"DOC5":4682,"FW2":3961,"FW3":3132,"HELITRON1_DM":564,"R1-2":3216,"TC1-2":1644,"G5A":2841,"G7":1192,"GYPSY7":5486,"GYPSY8":4955,"GYPSY9":5349,"GYPSY10":6006,"GYPSY11":4428,"GYPSY12":10218,"INVADER6":4885,"HEL":1317,"TC3":1743,"Beagle2":7220,"Q":759,"OSV":1543,"DME542581":10463}

tecount=collections.defaultdict(lambda:0)
astecount=collections.defaultdict(lambda:0)

tesum=0
mirnacount=0
astesum=0
asmirnacount=0


reo=re.compile(r"NM:i:(\d+)")


for line in args.sam:
     """
0         1         2              3    4         5    6         7      8            9                        10                  11
r1	16	M14653_te	172	70	23M	*	0	0	ATGTCGAGTTTCGTGCCGAATAA	FFFFFFFFFFFFFFFFFFBBBBB	PG:Z:novoalign	AS:i:0	UQ:i:0	NM:i:0	MD:Z:23
r2	0	M14653_te	240	70	27M	*	0	0	AACAGCTGCGGAATCGCACCGAATGCT	BBBBBFFFFFBFFFFFFFFFFFFFFFF	PG:Z:novoalign	AS:i:0	UQ:i:0	NM:i:0	MD:Z:27
     """
     line=line.rstrip("\n")
     a=line.split("\t")
     flag=int(a[1])
     if flag & 0x004 > 0:
          continue
     mq=int(a[4])
     if mq< minmq:
          continue
     
     
     mm=0
     mo=re.search(reo,line)
     if mo is None:
          continue
     
     mm=int(mo.group(1))

     if(mm>maxmm):
          continue
     
     antisense=False
     if flag& 0x10:
          antisense=True
     ref=a[2]
     readlen=len(a[9])
     
     if ref.endswith("_te"):
          if readlen<pistart or readlen>piend:
               continue
          teseq=ref[:-3]
          tecount[teseq]+=1
          tesum+=1
          if antisense:
               astesum+=1
               astecount[teseq]+=1
     elif ref.endswith("_miRNA"):
          if readlen<mistart or readlen>mistart:
               continue
          mirnacount+=1
          if antisense:
               asmirnacount+=1





teprintlist=["1360","412","ACCORD","AF222049","AF418572","AF541951","BAGGINS","BLOOD","BS","BS3","BS4","Beagle","Beagle2","CIRC","DIVER2","DM06920","DM23420","DM33463","DM88","DMAURA","DMBARI1","DMBLPP","DMCOPIA","DMCR1A","DMDM11","DME010298","DME278684","DME487856","DME542581","DME9736","DMGYPF1A","DMHFL1","DMIFACA","DMIS176","DMIS297","DMLINEJA","DMMDG3","DMREPG","DMRER1DM","DMRER2DM","DMRTMGD1","DMTHB1","DMTN1731","DMTNFB","DMTOM1_LTR","DMTRDNA","DMU89994","DMW1DOC","DMZAM","DM_ROO","DOC2","DOC3","DOC4","DOC5","F","FB","FROGGER","FW2","FW3","G2","G3","G4_DM","G5A","G5_DM","G6_DM","G7","GTWIN","GYPSY10","GYPSY11","GYPSY12","GYPSY2","GYPSY3","GYPSY4","GYPSY5","GYPSY6","GYPSY7","GYPSY8","GYPSY9","HEL","HOPPER2","INE1","INVADER","INVADER2","INVADER3","INVADER4","INVADER5","INVADER6","IVK","JOCKEY2","JUAN","LOOPER1_DM","M14653","MARINER2","McCLINTOCK","OPUS","OSV","PPI251","Q","QBERT","QUASIMODO","R1-2","ROOA_LTR","ROVER","ROXELEMENT","RT1B","RT1C","S2","SPRINGER","STALKER","STALKER2","STALKER3","STALKER4","TABOR","TC1","TC1-2","TC3","TIRANT","TRANSIB1","TRANSIB2","TRANSIB3","TRANSIB4","Tinker"]

cond=args.cond
gen=args.gen
rep=args.rep

for fam in teprintlist:
     tel=telengs[fam]
     tec=tecount[fam]
     normc=(1000000.0*tec)/float(mirnacount)
     topr="{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}".format(cond,rep,gen,fam,tel,tec,normc)
     print topr
 
