// Copyright (c) 2005 Broad Institute/Massachusetts Institute of Technology

#include "paths/FindSubsumedReads.h"
#include "paths/OrientedKmerPathId.h"
#include "STLExtensions.h"
#include "TaskTimer.h"
#include "Histogram.h"

longlong FindSubsumedReads( SubsumptionList& theSubList,
                            const vecKmerPath& pathsFw,
                            const vecKmerPath& pathsRc,
                            const KmerPathDatabase& pathsDb,
                            const int minNumKmers,
                            const longlong chunkSize,
			    ostream* out,
                            const int partition )
{
  bool equalLengthGaplessReads = true;

  int pathId=0;
  // Find the first nonempty Fw read:
  while( pathId < pathsFw.size() && pathsFw[pathId].NSegments()==0 )
    ++pathId;
  if( pathId == pathsFw.size() )
    FatalErr("No nonempty reads!  Surely not what you want.");

  int pathMinLength = pathsFw[pathId].MinLength();
  int pathMaxLength = pathsFw[pathId].MaxLength();

  if ( pathMinLength != pathMaxLength )
    equalLengthGaplessReads = false;

  // Check remaining Fw reads:
  for ( ++pathId; pathId < pathsFw.size() && equalLengthGaplessReads; ++pathId )
    if ( pathsFw[pathId].NSegments() != 0 &&
	 ( pathsFw[pathId].MinLength() != pathMinLength ||
	   pathsFw[pathId].MaxLength() != pathMaxLength ) )
      equalLengthGaplessReads = false;
  // Check all Rc reads:
  for ( pathId=0 ; pathId < pathsRc.size() && equalLengthGaplessReads; ++pathId )
    if ( pathsRc[pathId].NSegments() != 0 &&
	 ( pathsRc[pathId].MinLength() != pathMinLength ||
	   pathsRc[pathId].MaxLength() != pathMaxLength ) )
      equalLengthGaplessReads = false;
  
  if ( equalLengthGaplessReads )
  {
    if ( out )
      *out << "All reads are of equal length and have no gaps. "
           << " No reads can be subsumed." << endl;

    return 0;
  }

  const longlong numPasses = ( pathsDb.size() + chunkSize - 1 ) / chunkSize;

  histogram<int> histKmerCounts;

  histKmerCounts.ShowUnderflow();
  // histKmerCounts.AddLinearProgressionOfBins( 0, 10, 10 );
  histKmerCounts.AddLinearProgressionOfBins( 100, 100, 10 );

  histogram<int> histBelowTen( histKmerCounts );
  histogram<int> histTenToFifty( histKmerCounts );
  histogram<int> histAboveFifty( histKmerCounts );

  if( out )
    *out << Date() << ": finding subsumed reads in " << numPasses << " passes." << endl;

  longlong numSubsumed = 0;
  longlong numSubsumedLastPass = 0;

  longlong numRecords = 0;
  longlong numRecordsLastPass = 0;

  longlong start = 0;
  longlong stop = pathsDb.size() - 1;

  longlong chunkNum = 1;
  longlong chunkStart = start;
  longlong chunkStop  = min( start+chunkSize-1, stop );

  longlong cachedStart = -1;
  longlong cachedUpper = 0;

  TaskTimer subsumeTimer;

  subsumeTimer.Start();

  for ( longlong current = start; current <= stop; ++current )
  {
    if ( current == chunkStop )
    {
      subsumeTimer.Stop();

      if( out )
	*out << "Pass " << chunkNum++ << ": " 
	     << numSubsumed - numSubsumedLastPass << "\t"
	     << numRecords - numRecordsLastPass << "\t"
	     << subsumeTimer << endl;

      numSubsumedLastPass = numSubsumed;
      numRecordsLastPass = numRecords;
      chunkStop = min( current + chunkSize - 1, stop );

      subsumeTimer.Reset();
      subsumeTimer.Start();
    }

    // Look for entries in the paths database that correspond to
    // initial segments of paths.
    if ( pathsDb[current].PathPos() != 0 )
      continue;
      
    // Paths greater than or equal to partition are not checked for subsumptions.
    if ( partition != 0 && pathsDb[current].ReadId() >= partition )
      continue;

    OrientedKmerPathId thisOkpi( pathsDb[current].PathId() );
    const KmerPath* p_thisPath = thisOkpi.GetPathPtr( pathsFw, pathsRc );
    const int thisPathNumSegs = p_thisPath->NSegments();
    
    if ( p_thisPath->KmerCount() < minNumKmers )
      continue;
    
    // Find the range of entries that could intersect this segment.

    longlong from = current - pathsDb[current].Lookback();
    
    if ( cachedStart != pathsDb[current].Start() )
    {
      cachedStart = pathsDb[current].Start();
      cachedUpper = distance( pathsDb.Begin(),
                              upper_bound( pathsDb.Begin(), pathsDb.End(),
                                           pathsDb[current] ) );
    }
    longlong to = cachedUpper;


    vec<BriefSubsumptionRecord> subRecords;

    bool isSubsumed = false;

    for ( longlong other = from; other < to; ++other )
    {
      ForceAssertLe( pathsDb[other].Start(), pathsDb[current].Start() );
      
      // Skip segments which do not even intersect the current segment
      if ( pathsDb[other].Stop() < pathsDb[current].Start() )
        continue;

      // Eliminate segments that extend the current segment to the
      // right when the current path has more than one segment
      if ( thisPathNumSegs > 1 && pathsDb[other].Stop() > pathsDb[current].Stop() )
        continue;

      OrientedKmerPathId otherOkpi( pathsDb[other].PathId() );
      
      // A path can't subsume itself.
      if ( otherOkpi.GetId() == thisOkpi.GetId() )
        continue;
      
      // Now we make sure they agree perfectly until one path or the other ends.
      int thisSeg = 0;
        
      const KmerPath* p_otherPath = otherOkpi.GetPathPtr( pathsFw, pathsRc );
      int otherSeg = pathsDb[other].PathPos();

      // Skip segments that can't extend to the left.
      if ( otherSeg == 0 && pathsDb[other].Start() == pathsDb[current].Start() )
        continue;

      // Eliminate too-short paths.
      if ( p_otherPath->KmerCount() < minNumKmers )
        continue;
        
      // Eliminate paths that obviously don't match to the right.
      if ( otherSeg < p_otherPath->NSegments()-1 &&
           pathsDb[other].Stop() < pathsDb[current].Stop() )
        continue;

      // Eliminate paths that can't subsume because they're too short.
      if ( otherSeg + p_otherPath->NSegments() < thisPathNumSegs )
        continue;

      KmerPathLoc firstMatchingKmer( p_otherPath, otherSeg );
      firstMatchingKmer.SetKmer( pathsDb[current].Start() );
        
      KmerPathLoc thisRightScan( p_thisPath->Begin() );
      KmerPathLoc otherRightScan( firstMatchingKmer );
        
      if ( IsPerfectMatchToRight( thisRightScan, otherRightScan ) &&
           thisRightScan.atEnd() && ! otherRightScan.atEnd() )
      {
        isSubsumed = true;

        subRecords.push_back( BriefSubsumptionRecord( otherOkpi, DistMin( p_otherPath->Begin(),
                                                                          firstMatchingKmer ) ) );
//         cout << otherOkpi << " (" << p_otherPath->KmerCount() << " kmers)"
//              << " subsumes " 
//              << thisOkpi << " (" << p_thisPath->KmerCount() << " kmers)" 
//              << endl;
//         cout << *p_otherPath << endl;
//         for ( ; otherSeg > 0; --otherSeg )
//           cout << KmerPathInterval::Blank();
//         cout << *p_thisPath << endl;
//         cout << endl;
      }
    }

    if ( isSubsumed )
    {
      ++numSubsumed;
      numRecords += subRecords.size();
      theSubList.SetBriefRecordsFor( thisOkpi, subRecords );
    }

    if ( subRecords.size() < 10 )
      histBelowTen.AddDatum( p_thisPath->KmerCount() );
    else if ( subRecords.size() <= 50 )
      histTenToFifty.AddDatum( p_thisPath->KmerCount() );
    else
      histAboveFifty.AddDatum( p_thisPath->KmerCount() );
  }

  if( out ) {
    *out << "Kmer count bins     :\t"; histKmerCounts.PrintBinsAsLine( *out );
    *out << "Paths w/ <10 supers :\t"; histBelowTen.PrintDataAsLine( *out );
    *out << "Paths w/ 10-50      :\t"; histTenToFifty.PrintDataAsLine( *out );
    *out << "Paths w/ >50        :\t"; histAboveFifty.PrintDataAsLine( *out );
  }
  return numSubsumed;
}
