#!/usr/bin/env python
import os
import sys
import re
import argparse
import random
import collections



class FastaReader:
	"""
	A light-weight fasta reader;
	returns a tuple (header, sequence)
	
	"""
	def __init__(self,fh):
		self.__filehandle=fh
		self.__prevheader=None

	def __iter__(self):
		return self
	
	def close(self):
		self.__filehandle.close()
	
	def __next__(self):
		return self.next()
	
	def next(self):
		line=""
		header=self.__prevheader
		seq=""
		while(1):
			line=self.__filehandle.readline()
			if line=="":					# file is empty
				if(header is not None):
					self.__prevheader=None		# return last sequence
					return (header,seq)
				else:
					raise StopIteration		# file empty and no last sequence -> STOP
			line=line.rstrip("\n")				# somethin is in the file
			if(line.startswith(">")):			# we have a header
				line=line.lstrip(">")
				if(header is None):			# if it is the first header just set the name of the header
					header=line
				else:
					self.__prevheader=line	# if it is any other header, set the previous to the current and return the sequence
					return(header,seq)
			else:
				seq+=line				# normal line, add to sequence
	
	@classmethod
	def readAll(cls,genomeFile):
		fr=FastaReader(open(genomeFile))
		toret={}
		for header,seq in fr:
			header=header.split(" ")[0]
			toret[header]=seq
		return toret
    


            
    

parser = argparse.ArgumentParser(description="""           
Description
-----------
    Extract flanking sequences for primer design""",formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Prerequisites
-------------
    python version 3+

Authors
-------
    Robert Kofler 
""")

parser.add_argument("--genome", type=str, required=True, dest="refgenome", default=None, help="the reference genome")
parser.add_argument("--p-sites", type=str, required=True, dest="psites", default=None, help="P-element insertion sites")
parser.add_argument("--margin", type=int, required=False, dest="margin", default=50, help="security margin on both sides of insertion site; takes inaccuracy of estimate of insertion site into account")
parser.add_argument("--fragment-size", type=int, required=False, dest="fragsize", default=500, help="total size of fragment needed for primer design")

args = parser.parse_args()
complement = {'A': 'T', 'C': 'G', 'G': 'C', 'T': 'A'} 
pseq="CATGATGAAATAACATAAGGTGGTCCCGTCGAAAGCCGAAGCTTACCGAAGTATACACTTAAATTCAGTGCACGTTTGCTTGTTGAGAGGAAAGGTTGTGTGCGGACGAATTTTTTTTTGAAAACATTAACCCTTACGTGGAATAAAAAAAAATGAAATATTGCAAATTTTGCTGCAAAGCTGTGACTGGAGTAAAATTAATTCACGTGCCGAAGTGTGCTATTAAGAGAAAATTGTGGGAGCAGAGCCTTGGGTGCAGCCTTGGTGAAAACTCCCAAATTTGTGATACCCACTTTAATGATTCGCAGTGGAAGGCTGCACCTGCAAAAGGTCAGACATTTAAAAGGAGGCGACTCAACGCAGATGCCGTACCTAGTAAAGTGATAGAGCCTGAACCAGAAAAGATAAAAGAAGGCTATACCAGTGGGAGTACACAAACAGAGTAAGTTTGAATAGTAAAAAAAATCATTTATGTAAACAATAACGTGACTGTGCGTTAGGTCCTGTTCATTGTTTAATGAAAATAAGAGCTTGAGGGAAAAAATTCGTACTTTGGAGTACGAAATGCGTCGTTTAGAGCAGCAGCTGAGGGAGTCTCAACAGTTGGAGGAGTCTCTACGCAAAATCTTCACGGACACGCAGATACGGATACTGAAGAATGGTGGACAAAGAGCTACGTTCAATTCCGACGACATTTCTACAGCTATTTGTCTCCACACCGCAGGCCCTCGAGCGTATAACCATCTGTACAAAAAAGGATTTCCTTTGCCCAGTCGTACGACTTTGTACAGATGGTTATCAGATGTGGACATAAAAAGAGGATGTTTGGATGTGGTCATAGACCTAATGGACAGTGATGGAGTTGATGACGCCGACAAGCTTTGCGTACTCGCTTTCGACGAGATGAAGGTCGCTGCTGCCTTCGAGTATGACAGCTCTGCTGATATTGTTTACGAGCCAAGCGACTATGTCCAACTGGCTATTGTTCGTGGTCTAAAAAAATCGTGGAAGCAGCCAGTTTTTTTCGATTTTAATACCCGAATGGACCCGGATACTCTTAACAATATATTAAGGAAACTGCATAGGAAAGGATATTTAGTAGTTGCTATTGTATCCGATTTAGGTACCGGAAACCAAAAGCTATGGACAGAGCTCGGTATATCAGAATGTAAGTTTCGTATATTACAAAAATCAGATAATCCTTGAAATTCCATTTTTTAGCAAAAACCTGGTTTAGCCATCCTGCAGATGACCATTTAAAGATTTTCGTTTTTTCGGATACGCCACATTTAATTAAGTTAGTCCGTAACCACTATGTGGATTCCGGATTAACAATAAATGGGAAAAAATTAACAAAAAAAACAATTCAGGAGGCACTTCATCTTTGCAACAAGTCCGATCTGTCTATCCTCTTTAAAATTAATGAAAATCACATTAATGTTCGATCGCTCGCAAAACAGAAGGTTAAATTGGCTACCCAGCTGTTTTCGAATACCACCGCTAGCTCGATCAGACGCTGCTATTCATTGGGGTATGACATTGAAAATGCCACCGAAACTGCGGACTTCTTCAAATTGATGAATGATTGGTTCGACATTTTTAATTCTAAATTGTCCACATCCAATTGCATTGAGTGCTCGCAACCTTATGGCAAGCAGTTGGATATACAGAATGATATTTTGAATCGAATGTCGGAAATTATGCGAACAGGAATTCTGGATAAACCCAAAAGGCTCCCATTTCAAAAAGGTATCATTGTGAATAATGCTTCGCTTGATGGCTTGTATAAATATTTGCAAGAAAACTTCAGTATGCAATACATATTAACAAGCCGTCTCAACCAAGACATTGTGGAGCATTTTTTTGGCAGCATGCGATCGAGAGGTGGACAATTCGACCATCCCACTCCACTGCAGTTTAAGTATAGGTTAAGAAAATATATAATAGGTATGACAAATTTAAAAGAATGCGTAAACAAAAATGTAATTCCATGATTTATAATTGTTTAATGTTTAGCTATATGTTTCAGGAAAGTTTCAGTTGAGAATGTAGGTAGTTATGTGCTGTCTATTGTGTTTTGTCTTTTATCTGTTTCTTTTCATTTTATTATTTAATCATTATCCTTTTGCTTATCCAGCCAGGAATACAGAAATGTTAAGAAATTCGGGAAATATCGAAGAGGACAACTCTGAAAGCTGGCTTAATTTAGATTTCAGTTCTAAAGAAAACGAAAATAAAAGTAAAGATGATGAGCCTGTCGATGATGAGCCTGTCGATGAGATGTTAAGCAATATAGATTTCACCGAAATGGATGAGTTGACGGAGGATGCGATGGAATATATCGCGGGCTATGTCATTAAAAAATTGAGAATCAGTGACAAAGTAAAAGAAAATTTGACATTTACATACGTCGACGAGGTGTCTCACGGCGGACTTATTAAGCCGTCCGAAAAATTTCAAGAGAAGTTAAAAGAGCTAGAATGTATTTTTTTGCATTATACAAATAATAATAATTTTGAAATTACAAATAATGTAAAGGAAAAATTAATATTAGCAGCGCGAAACGTCGATGTTGATAAACAAGTAAAATCTTTTTATTTTAAAATTAGAATATATTTTAGAATTAAGTACTTCAACAAAAAAATTGAAATTAAAAATCAAAAACAAAAGTTAATTGGAAACTCCAAATTATTAAAAATAAAACTTTAAAAATAATTTCGTCTAATTAATATTATGAGTTAATTCAAACCCCACGGACATGCTAAGGGTTAATCAACAATCATATCGCTGTCTCACTCAGACTCAATACGACACTCAGAATACTATTCCTTTCACTCGCACTTATTGCAAGCATACGTTAAGTGGATGTCTCTTGCCGACGGGACCACCTTATGTTATTTCATCATG"
rcpseq="".join([complement[x] for x in  reversed(list(pseq))])
genome=FastaReader.readAll(args.refgenome)
fragsize,margin=args.fragsize,args.margin
fraghalf=int(fragsize/2.0)
assert(fraghalf>margin) # will not work otherwise
targsize=fraghalf-margin

for l in open(args.psites):
	"""
	0	1			2		3		  4	5
	1	contig_508 8147216 -158..2907 F rev 1
	2	contig_164 412854 -1176..2789 R rev 1
	3	contig_232 3074038 -0..1892 R fwd 1
	"""
	a=re.split("\s+",l.rstrip("\n"))
	id,chrm,pos,pele,support,strand=a[0],a[1],int(a[2]),a[3],a[4],a[5]
	strand="+"
	activeseq=pseq
	if(a[5]=="rev"):
		strand="-"
		activeseq=rcpseq
	namebase=">{0}_".format(id)
	info="#{0}:{1}-{2}".format(chrm,pos,strand)
	if chrm not in genome:
		raise Exception("Contig not found in reference genome "+chrm)
	contigseq=genome[chrm]
	leftseq=contigseq[pos-fraghalf:pos-margin]+"|"+contigseq[pos-margin:pos]+"|"+activeseq[:margin]+"|"+activeseq[margin:fraghalf]
	rightseq=activeseq[-1-fraghalf:-1-margin]+"|"+activeseq[-1-margin:]+"|"+contigseq[pos:pos+margin]+"|"+contigseq[pos+margin:pos+fraghalf]
	absseq=contigseq[pos-fraghalf:pos-margin]+"|"+contigseq[pos-margin:pos]+"|"+contigseq[pos:pos+margin]+"|"+contigseq[pos+margin:pos+fraghalf]
	print(namebase+"L "+info)
	print(leftseq)
	print(namebase+"R "+ info)
	print(rightseq)
	print(namebase+"x "+info)
	print(absseq)
### debug code
"""
fraghalf=10
margin=5
pos=15
# sam is 1-based
#          123456789012345
contigseq="000001234567890abcdefghijzzzzz"
activeseq="AAAAACCCCC"
leftseq=contigseq[pos-fraghalf:pos-margin]+"|"+contigseq[pos-margin:pos]+"|"+activeseq[:margin]+"|"+activeseq[margin:fraghalf]
rightseq=activeseq[-fraghalf:-margin]+"|"+activeseq[-margin:]+"|"+contigseq[pos:pos+margin]+"|"+contigseq[pos+margin:pos+fraghalf]
absseq=contigseq[pos-fraghalf:pos-margin]+"|"+contigseq[pos-margin:pos]+"|"+contigseq[pos:pos+margin]+"|"+contigseq[pos+margin:pos+fraghalf]
print(leftseq)  # 12345|67890|AAAAA|CCCCC
print(rightseq) # AAAAA|CCCCC|abcde|fghij
print(absseq)   # 12345|67890|abcde|fghij


complement = {'A': 'T', 'C': 'G', 'G': 'C', 'T': 'A'} 
rcpseq="".join([complement[x] for x in  reversed(list(activeseq))])
print(rcpseq) # GGGGGTTTTT
"""

   