import sys
import random
from optparse import OptionParser, OptionGroup
import collections
import math
import re


def filter_euchromosomes(samentries,euchromosomes):
	filtered=[]
	for s in samentries:
		if s.chr in euchromosomes:
			filtered.append(s)
	return filtered


class SamChimeraReader:
	def __init__(self,file):
		self.__file=file
		self.__fh=open(file)
		self.__buffer=None
		self.__nextstop=False

		
	def __iter__(self):
		return self
	
	def next(self):
		if self.__nextstop:
			raise StopIteration
		active=self.__getnext()
		activeid=active.readid
		toret=[]
		toret.append(active)
		while(1):
			sam=self.__getnext()
			if sam is None:
				self.__nextstop=True
				break
			if(sam.readid==activeid):
				toret.append(sam)
			else:
				self.__bufferthis(sam)
				break
		return toret

	
	def __nextfromfile(self):
		while(1):
			line=self.__fh.readline()
			if line=="":
				return None
			line=line.rstrip('\n')
			if line != ""and not line.startswith("@"):
				break
		a=line.split("\t")
		
		return SamEntry(a)
	
	def __getnext(self):
		if self.__buffer is None:
			return self.__nextfromfile()
		else:
			toret=self.__buffer
			self.__buffer=None
			return toret
		
	def __bufferthis(self,sync):
		self.__buffer=sync


class SamEntry:
	def __init__(self,e):
		"""
		gnl|ti|580397033	0	contig_12389	722	14	13S534M482S	*	0	0	TGGCGGTTTGTAAGTGTCGCTCACCTGGCACCAAAATCAGCTGAGCCCATCCACCGTGGTGGAGTGCAACCGGTAACAAAATATTCTGTTTGCCTTAAAAATTAGCCGTTCCAATCGGAGTTGGGCGCTCCGCAAGGGGTGAGGGGTCCATATTTTCCCGCGCTTCTCTCGCCAAGGGCTGGAACTCCTGGAAATTTCTCCTCCTTTGCATTGTCGATCTCCTTTTGTTGCTGAACTTTCTAATAACAACTACCGCCGATAACAACAGCTGACAACTGCAGCTGTGCATTAGTGTTGGAAAGCAGCGCCCTATCTTGGTCACCAGAGGGCGCCCAAGCCCTGCTGGCCAGCCCCCGAAACATTGCATACTTTCGGGAACAGCATTTTGGCCGCCAAAATTAAATTGGAAATCGCCGACTGAACTACATATCTTCTTCTTTTCTCTAATTCTCTTTTTTCCCTTCGGGAAGCCCTAAATCCGCATAGCGTCTGTCCTCATATTGAGCGGCTTGTGGTCTGCGTCTGCGTCGTATCGGCCGGCAGCATATTAACGAACTTTCGTAGCTGGCAGTAGGTATAAAATGCATCAAAGTTTATAAAAAAATATGGCTGGAAAAAACGAAGTTACTTCAGGTGGCAGCCACAAAAGTGGAAATCCACCCGCGAATCAAACCGCGGCCGGGGCACGGATTCTGGAGGCCCCTACATCGAAAAATATTAACAGGAAAACTACTGCATCGGTGGCACACTTGGCCGATGCAGCGGACTATCCAACAAGCAGTAATGGCTGTAAAATGCCAGCGGGAGCAACTGCCTTTTCTCGAAGCAGCTTAATGGCCAGAACGCCGCCCAACAGCTACGCATTCGGCTCCGACGGCTATAAACTTGAAGTCTCTGCCAGCTTTTGCACCAGCAGCAGTCGTCACAGTAGACAATGGTCGCTATTTCTGCACCAAGCCAGCATCCACCGCAAATTGTGGCATGAAGATGAATTGGCAAGAATGAAGACGGTTTTGGATGGTGTAAAAC	*	AS:i:486	XS:i:461	XF:i:3	XE:i:3	NM:i:12
		gnl|ti|580485738	0	contig_12389	740	26	11S516M581S	*	0	0	GTGGCTTAAGACAAAATCAGCTGAGCCCATCCACCGTGGTGGAGTGCAACCGGTAACGAAATATTCTGTTTGCCTTAAAAATTAGCCGTTCCAATCGGAGTTGGTCGCTCCGCAAGGGGTGAGGGGTTCATATTTTCCCGCGCTTCTCTCGCCAAGGGCTGGAACTCCTGGAACTTTCTCCTCCTTTGCATTGTCGATCTCCTTTTGTTGCTGAACTTTCTAATACCAACTACCGCCGATAACAACAGCTGACAACTGCAGCTGCGCACCAGTGTTGGAAAGCAGCGCCCTATCTTGGTCACCAGAGGGCGCCCAAGCCCTGCTGGCCAGCCCCCGAAACATTGCATACTTTCGGGAACAGCATTTTGGCCGCCAAAATTAAATTGGAAATCGCCGACTGAACTACATATCTTCTTCCTTTCTCTAATTCTCTTTTTTCCCTTCGGGAAGCCCTAAATCCGCATAGCGTCTGTCCTCATATTGAGCGGCTTGTGGTATGCGTCTGTGTCGCAAAGGCCGGCAGCATATTAACGAACTTTCGTACCTGGCAGTAGGTATAAAATGCATCAAAGTTTATAAAAAATGGCTGGAAAAAACGAAATTACTCCAGGTGGCAGCCACAAAAGTGGAAATCCACCCGCGAATCAAACCGCGGCCGGGGCACGGATTCTGGAGGCCCCTACATCCATTAATATTACCAGGAAAACTACTGCATCGGTGGCACACTTGGCCGATGCAGCGGACAATGCAACAAGCAGTAATGGCTGTAAAATGCCAGCGGGAGCAACTGCCTTTTCTCGAAGCAGCTTAATGGCCAGAACGCCGCCAACAGCTACGCATTCGGCCCCGACGGCTTTAAACTTGAAGTCTCTGCCAGCTTTTACACCAGCAGCAGTCGTCACGGTAGAGGATACGCCAAAAAGAGGTCGTGAATTAAGTCCACCTGCCGACCAATCAGAAGCGGCACCAAAAAGGCCAAGAAGCGTCCAGACAGGTCTAAAAGAACAGATCACTGAACTCGGAGCACTTGTCGACCAAATAGCTGCAATGGTCGCTATTTCTGCACCAAGCCAGCATCCAACCGCAAATTGGTGGCATGAAGGATGAA	*	AS:i:492	XS:i:444	XF:i:3	XE:i:3	NM:i:6
		"""
		cig=e[5]
		pe=[]
		for fi in re.finditer(r"(\d+)([HSIDMN])", cig):
			num=int(fi.group(1))
			id=fi.group(2)
			pe.append((num,id))
		self.cigsplit=pe

		self.e=e

		
		
	@property
	def cigar(self):
		return self.e[5]
		
	@property
	def read(self):
		return self.e[9]
	
	
	@property
	def chr(self):
		return self.e[2]
	@property
	def flag(self):
		return  int(self.e[1])
	
		
	@property
	def readid(self):
		return self.e[0]
	
	@property
	def start(self):
		return int(self.e[3])
		
	@property
	def strand(self):
		flag=self.flag
		strand="F"
		if flag & 0x016:
			strand="R"
		return strand

	@property
	def end(self):
		start=self.start
		end=start
		cigsplit=self.cigsplit
		
		for num,id in cigsplit:
			if id=="M":
				end+=num
			elif id=="D":
				end+=num
			elif id=="I" or id=="S" or id=="H":
				pass
			else:
				raise Exception("unknown cigar"+id)
		return end
	
	@property
	def matchscore(self):
		ms=self.end-self.start
		return ms

	
	def format(self):
		return "\t".join(self.e)
		
	def format_gtf(self):
		pass
		


parser = OptionParser()
parser.add_option("--sam",dest="sam",help="the input file as sam")
parser.add_option("--min-leng",dest="minleng",help="the minimum length")
parser.add_option("--printall",action="store_true",dest="printall",help="should all hits be printed")
(options, args) = parser.parse_args()
euchromosomes=set(["X","2L","2R","3L","3R","4"])
minleng=int(options.minleng)
printall=bool(options.printall)


for g in SamChimeraReader(options.sam):
	filtered=filter_euchromosomes(g,euchromosomes)
	if len(filtered)<1:
		continue
	es=sorted(filtered,key=lambda s: -s.matchscore)
	
	for th in es: 
		#X	Ensembl	Repeat	2419108	2419128	42	.	.	hid=trf; hstart=1; hend=21
		if th.matchscore<minleng:
			break
		top=[]
		top.append(th.chr)
		top.append(str(th.start))
		top.append(str(th.end))
		print "\t".join(top)
		if not printall:
			break