#!/usr/bin/env python
import os
import sys
import inspect
import re
import argparse
import random
import math
import collections
import fileinput
prog = re.compile(r"(\d+)([MISDHN])")


def print_stat(file,family):

     sense = 0
     antisense = 0
     psense = 0
     pantisense = 0
     
     asA = 0
     asnotA = 0
     sA = 0
     snotA = 0


     pasA = 0
     pasnotA = 0
     psA = 0
     psnotA = 0

     minl=23
     maxl=28
     for line in file:
          """
          0         1         2              3    4         5    6         7      8            9                        10                  11
          r1	16	M14653_te	172	70	23M	*	0	0	ATGTCGAGTTTCGTGCCGAATAA	FFFFFFFFFFFFFFFFFFBBBBB	PG:Z:novoalign	AS:i:0	UQ:i:0	NM:i:0	MD:Z:23
          r2	0	M14653_te	240	70	27M	*	0	0	AACAGCTGCGGAATCGCACCGAATGCT	BBBBBFFFFFBFFFFFFFFFFFFFFFF	PG:Z:novoalign	AS:i:0	UQ:i:0	NM:i:0	MD:Z:27
          """
          a=line.rstrip("\n").split("\t")
          # discard unmapped
          flag=int(a[1])
          if flag & 0x004 > 0:
               continue
          
          
          seq=a[9]
          ref=a[2]
          readlen=len(seq)
          if readlen<minl or readlen>maxl:
               continue
          if ref.endswith("_te"):
               teseq=ref[:-3]
               start=int(a[3])
               if flag& 0x10:
                    if seq[-1]=="A":
                         asA+=1
                    else:
                         asnotA+=1
                    antisense+=1
                    if teseq==family:
                         if seq[-1]=="A":
                              pasA+=1
                         else:
                              pasnotA+=1
                         pantisense+=1
               else:
                    if seq[9]=="A":
                         sA+=1
                    else:
                         snotA+=1
                    
                    sense+=1
                    if teseq==family:
                         if seq[9]=="A":
                              psA+=1
                         else:
                              psnotA+=1
                         psense+=1
     
                         
     print "percent sense"
     tmp="na"
     if(sense+antisense>0):
          tmp=float(sense)/float(sense+antisense)
     print "all\t{0}\t{1}\t{2}".format(sense,antisense,tmp)
     tmp="na"
     if(psense+pantisense>0):
          tmp=float(psense)/float(psense+pantisense)
     print "P-element\t{0}\t{1}\t{2}".format(psense,pantisense,tmp)
     
     print "percent A"
     tmp="na"
     if(asA+asnotA>0):
          tmp=float(asA)/float(asA+asnotA)
     print "antisense-A\t{0}\t{1}\t{2}".format(asA,asnotA,tmp)     
     tmp="na"
     if(sA+snotA>0):
          tmp=float(sA)/float(sA+snotA)
     print "sense-A\t{0}\t{1}\t{2}".format(sA,snotA,tmp)
     
     tmp="na"
     if(pasA+pasnotA>0):
          tmp=float(pasA)/float(pasA+pasnotA)
     print "P antisense-A\t{0}\t{1}\t{2}".format(pasA,pasnotA,tmp)     
     tmp="na"
     if(psA+psnotA>0):
          tmp=float(psA)/float(psA+psnotA)
     print "P sense-A\t{0}\t{1}\t{2}".format(psA,psnotA,tmp)



     

parser = argparse.ArgumentParser(description="""           
Description
-----------
Summary statistics
""",formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""

""")
parser.add_argument('--sam', type=argparse.FileType('r'), default=None,dest="sam", required=True, help="A sam file")

args = parser.parse_args()



print_stat(args.sam,"PPI251")



