#!/usr/bin/env python
import os
from site import ENABLE_USER_SITE
import sys
import inspect
import re
import argparse
import random
import math
import collections
import fileinput
prog = re.compile(r"(\d+)([MISDHN])")


def load_fai(faifile):
	toret={}
	for l in open(faifile):
		# PPI251	2907	132096	2907	2908
		a=l.rstrip("\n").split("\t")
		actseid=a[0]
		actlen=int(a[1])
		toret[actseid]=actlen
	return toret


def isSense(flag):
	"""
	In Read 1, sequencing reads map to the antisense strand.
	In Read 2, sequencing reads map to the sense strand.
	https://support.illumina.com/content/dam/illumina-support/documents/documentation/chemistry_documentation/samplepreps_truseq/truseq-stranded-mrna-workflow/truseq-stranded-mrna-workflow-reference-1000000040498-00.pdf
	"""
	assert flag & 0x1 > 0 # read must be paired end; otherwise the thing does not work
	
	if (flag & 0x40 > 0):
		# first in pair
		if(flag & 0x10 ):
			# if first in pair and reverse complement -> sense transcript
			return True
		else:
			# if first in pair and sense strand -> antisense transcript
			return False
	elif (flag & 0x80>0):
		# second in pair
		if(flag &0x0):
			# if second in pair and reverse complement -> antisense transcript
			return False 
		else:
			# if second in pair and sense strand -> sense transcript
			return True
	else:
		raise Exception("weird read; neither first nor second")




parser = argparse.ArgumentParser(description="""           
Description
-----------
Summary statistics
""",formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
miRNA: 21-23nt
piRNA: 23-28nt


Authors
-------
    Robert Kofler
""")
parser.add_argument('--sam', type=argparse.FileType('r'), default=None,dest="sam", required=True, help="A sam file")
parser.add_argument("--min-mq", type=int, required=False, dest="minmq", default=20, help="min mapping quality")
parser.add_argument("--sample-id", type=str, required=True, dest="sid",  help="the sample id")


args = parser.parse_args()
minmq=args.minmq
sid=args.sid



reads=0
mappedreads=0
mappedmq=0
sgene,asgene=0,0 # sense and anti-sense counts of gene
ste,aste=0,0
spele,aspele=0,0 # sense and antis-sense counts of the Pelement

for line in args.sam:
	a=line.rstrip("\n").split("\t")
	reads+=1
    # discard unmapped
	flag=int(a[1])
	if flag & 0x004 > 0:
		continue 
	mappedreads+=1
	# discard low mapping quality
	mq=int(a[4])
	if mq< minmq:
		continue
	mappedmq+=1
	# Reads mapped to a reference gene are used for normalization
	refchr=a[2]

	# get the active transcript
	issens=isSense(flag)
	if(refchr=="PPI251"):
		if issens:
			spele+=1
		else:
			aspele+=1
	elif refchr.endswith("_mRNA"):
		if issens:
			sgene+=1
		else:
			asgene+=1
	else:
		if issens:
			ste+=1
		else:
			aste+=1



topr=[args.sid,reads,mappedreads,mappedmq,sgene,asgene,ste,aste,spele,aspele]
topr=[str(i) for i in topr]
print("\t".join(topr))