package xww.science

import xww.XwwException
import collection.mutable.ArrayBuffer

/**
 * Created by IntelliJ IDEA.
 * User: xiongwe
 * Date: 3/26/12
 * Time: 6:50 PM
 * To change this template use File | Settings | File Templates.
 */
class FastaItemMemEfficient(annotation:String, val Seqs: Iterator[String]){
  val (id, annotationLine) = Fasta.splitAnnotationLine(annotation)
}
class FastaItemBuffer(annotation:String, val Seqs: Iterator[StrWithLocation]){
  val (id, annotationLine) = Fasta.splitAnnotationLine(annotation)
}
class FastaItemRegular(annotation: String, val Seqs: List[String]) {
  val (id, annotationLine) = Fasta.splitAnnotationLine(annotation)
  lazy val length = Seqs.foldLeft(0)(_ + _.length)
}
class FastaItemSingleLineSeq(annotation:String, val Seq:String){
  val (id, annotationLine) = Fasta.splitAnnotationLine(annotation)
  val length = Seq.length
}
class StrWithLocation(val seq:String, val location:Int)

/**
 For all Fasta types, [id] doesn't include >, and [annotationLine] does.
 */
object Fasta {

  private val splitorForAnnotation = """^(>\S+).*""".r
  private val emptyLine = "^\\s*$".r
  private def removeEmptyLines(lines:Iterator[String]) = lines.filter( x => emptyLine.findPrefixOf(x).isEmpty && x.head != '#')
  /**
  Whether a line is the FastaItem header
   */
  def isFastaAnnotation(line:String) = splitorForAnnotation.findPrefixOf(line).isDefined
  /**
  Split FastaItem header to get id, return (id, header)
   if format illegal throw customized exception
   */
  def splitAnnotationLine(annotationLine: String):(String,String) = {
    try {
      val splitorForAnnotation(id) = annotationLine
      (id, annotationLine)
    } catch {
      case inner: Throwable => throw new XwwException(annotationLine + " is not a valid Fasta annotationLine text.", inner)
    }
  }
  /**
  Get FastaItem for the most efficient usage, i.e. every line in seq is iterated
   */
  def getFastaItemsMemEfficient(lines:Iterator[String]) = new Iterator[FastaItemMemEfficient]{
    private val bufferedLines = removeEmptyLines(lines).buffered
    private var annotationFound = false

    def hasNext = annotationFound || {
      while(!bufferedLines.isEmpty && !annotationFound){
        annotationFound = isFastaAnnotation(bufferedLines.head)
        if (!annotationFound) bufferedLines.next()
      }
      annotationFound
    }

    def next() = {
      if (!hasNext) Iterator.empty.next()
      annotationFound = false
      val head = bufferedLines.next()
      val previousSeqs = bufferedLines.span(!isFastaAnnotation(_))._1
      new FastaItemMemEfficient(head, previousSeqs)
    }
  }
  /**
  Get FastaItem for regular usage, i.e. seqs is provided in List[String]
   */
  def getRegularFastaItemsRegular(lines:Iterator[String]) = new Iterator[FastaItemRegular]{
    val bufferedLines = removeEmptyLines(lines).buffered
    var annotationFound = false

    def hasNext = annotationFound || {
      while (!bufferedLines.isEmpty && !annotationFound) {
        annotationFound = isFastaAnnotation(bufferedLines.head)
        if (!annotationFound) bufferedLines.next()
      }
      annotationFound
    }

    def next() = {
      if (!hasNext) Iterator.empty.next()
      annotationFound = false
      val seqs = new ArrayBuffer[String]()
      val head = bufferedLines.next()
      while (bufferedLines.hasNext && !isFastaAnnotation(bufferedLines.head)){
        seqs += bufferedLines.next()
      }
      new FastaItemRegular(head, seqs.toList)
    }
  }
  /**
  Get FastaItem for convenient yet memory-unfriendly usage, i.e. Seqs are combined into a single text string.
   */
  def getRegularFastaItemsSingleLine(lines:Iterator[String]) = new Iterator[FastaItemSingleLineSeq]{
    val bufferedLines = removeEmptyLines(lines).buffered
    var annotationFound = false

    def hasNext = annotationFound || {
      while(!bufferedLines.isEmpty && !annotationFound){
        annotationFound = isFastaAnnotation(bufferedLines.head)
        if (!annotationFound) bufferedLines.next()
      }
      annotationFound
    }

    def next() = {
      if (!hasNext) Iterator.empty.next()
      annotationFound = false
      val seqs = new StringBuilder()
      val head = bufferedLines.next()
      while (bufferedLines.hasNext && !isFastaAnnotation(bufferedLines.head)){
        seqs.append( bufferedLines.next() )
      }
      new FastaItemSingleLineSeq(head, seqs.toString)
    }
  }

  /**
  Get FastaItem with buffered size and overlap
   */
  def getFastaItemBufferedSize(charIter:Iterator[Char], bufferSize:Int, overlap:Int = 0):Iterator[FastaItemBuffer] = new Iterator[FastaItemBuffer]{
    private val sourceChar = charIter.buffered
    private var fastaHeaderFound = false
    private var isFirstCharInLine = true

    private def isLineTerminator(ch:Char) = ch == '\n' || ch == '\r'
    private def isWhiteSpace(ch:Char) = isLineTerminator(ch) || ch == ' ' || ch == '\t'
    private def isSeqChar(ch:Char) = !isWhiteSpace(ch) && ch != '>'
    private def isHeaderChar(ch:Char) = ch == '>'
    //Get line from the current pos. Also iterate over all empty line terminators
    private def getCurrentLine() = {
      val sb  = new StringBuilder()
      sourceChar.takeWhile(!isLineTerminator(_)).foreach(sb.append)
      while( sourceChar.nonEmpty && isLineTerminator(sourceChar.head)) sourceChar.next()
      isFirstCharInLine = true
      sb.toString()
    }

    def hasNext = fastaHeaderFound || {
      while (sourceChar.nonEmpty && !fastaHeaderFound){
        if (isFirstCharInLine){
          isFirstCharInLine = false
          if (sourceChar.head == '>') fastaHeaderFound = true
        } else if ( isLineTerminator(sourceChar.next()) ) isFirstCharInLine = true
      }
      fastaHeaderFound
    }

    def next() = {
      if (!hasNext) Iterator.empty.next()
      fastaHeaderFound = false
      new FastaItemBuffer(getCurrentLine(), new Iterator[StrWithLocation]{
        private var seqStillEffective = false
        def hasNext = seqStillEffective || {
          while( sourceChar.nonEmpty && !seqStillEffective && !isHeaderChar(sourceChar.head) ){
            if ( isSeqChar(sourceChar.head)) seqStillEffective = true
            else sourceChar.next()
          }
          seqStillEffective
        }
        private var lastBufferedChars = "".toCharArray
        private var location = 1
        def next() = {
          val size = bufferSize + overlap
          location -= lastBufferedChars.length
          val sb = new StringBuilder(size)
          sb.appendAll(lastBufferedChars)
          while ( sourceChar.nonEmpty && sourceChar.head != '>' && sb.length < (bufferSize + overlap)) {
            val ch = sourceChar.next()
            if (!isWhiteSpace(ch)) sb.append(ch)
          }
          lastBufferedChars = sb.takeRight(overlap).toArray
          val res = new StrWithLocation(sb.toString(), location)
          seqStillEffective = false
          location += res.seq.length
          res
        }
      })
    }
  }

}