/////////////////////////////////////////////////////////////////////////////
//                   SOFTWARE COPYRIGHT NOTICE AGREEMENT                   //
//       This software and its documentation are copyright (2005) by the   //
//   Broad Institute/Massachusetts Institute of Technology.  All rights    //
//   are reserved.  This software is supplied without any warranty or      //
//   guaranteed support whatsoever. Neither the Broad Institute nor MIT    //
//   can be responsible for its use, misuse, or functionality.             //
/////////////////////////////////////////////////////////////////////////////

#ifndef FORCE_DEBUG
     #define NDEBUG
#endif

#include "CoreTools.h"
#include "Feudal.h"
#include "FeudalTemplate.h"
#include "math/HoInterval.h"
#include "system/SystemTemplate.h"
#include "paths/AlignAndMerge.h"
#include "paths/CompletedInsert.h"
#include "paths/HyperKmerPath.h"
#include "paths/KmerPath.h"
#include "paths/KmerPathBreadthSearcher.h"
#include "paths/PileReadsOnPath.h"
#include "random/Bernoulli.h"

void SloppyReadPlacement::Reverse( const KmerPath& p )
{    rc_ = !rc_;
     KmerPathLocAlt let = e_.LeftEndTo( ), ret = e_.RightEndTo( );
     if ( e_.LeftEndMapped( ) )
     {    let.Reverse(p);
          e_.SetRightEndTo(let);    }
     if ( e_.RightEndMapped( ) )
     {    ret.Reverse(p);
          e_.SetLeftEndTo(ret);    }
     Bool temp = e_.LeftEndMapped( );
     e_.SetLeftEndMapped( e_.RightEndMapped( ) );
     e_.SetRightEndMapped(temp);    }

void CompletedInsert::Reverse( )
{    for ( int i = 0; i < locs_.size( ); i++ )
     {    for ( int j = 0; j < locs_[i].size( ); j++ )
               locs_[i][j].Reverse( paths_[i] );    }
     for ( int i = 0; i < paths_.size( ); i++ )
          paths_[i].Reverse( );
     swap( id1_, id2_ );    }

void CompletedInsert::BinaryWrite( int fd ) const
{    WriteBytes( fd, &id1_, sizeof(int) );
     WriteBytes( fd, &id2_, sizeof(int) );
     WriteBytes( fd, &min_length_, sizeof(int) );
     WriteBytes( fd, &max_length_, sizeof(int) );
     WriteBytes( fd, &min_perfect_match_, sizeof(int) );
     paths_.BinaryWrite(fd);
     locs_.BinaryWrite(fd);    }

void CompletedInsert::BinaryRead( int fd )
{    ReadBytes( fd, &id1_, sizeof(int) );
     ReadBytes( fd, &id2_, sizeof(int) );
     ReadBytes( fd, &min_length_, sizeof(int) );
     ReadBytes( fd, &max_length_, sizeof(int) );
     ReadBytes( fd, &min_perfect_match_, sizeof(int) );
     paths_.BinaryRead(fd);
     locs_.BinaryRead(fd);    }

void BinaryWrite( const String& filename, const vec<CompletedInsert>& inserts )
{    int fd = OpenForWrite(filename);
     int n = inserts.size( );
     WriteBytes( fd, &n, sizeof(int) );
     for ( int i = 0; i < n; i++ )
          inserts[i].BinaryWrite(fd);
     close(fd);    }

void BinaryRead( const String& filename, vec<CompletedInsert>& inserts )
{    int fd = OpenForRead(filename);
     int n;
     ReadBytes( fd, &n, sizeof(int) );
     inserts.resize(n);
     for ( int i = 0; i < n; i++ )
          inserts[i].BinaryRead(fd);
     close(fd);    }

INSTANTIATE_MASTERVEC( serfvec<SloppyReadPlacement>, SloppyReadPlacement )

Bool IsCompatible( const KmerPath& p1, const KmerPath& p2,
     const SloppyReadPlacement& loc1, const SloppyReadPlacement& loc2 )
{    ForceAssertEq( loc1.ReadId( ), loc2.ReadId( ) );
     ForceAssertEq( loc1.Rc( ), loc2.Rc( ) );
     KmerPathLocAlt anchor1, anchor2;
     if ( loc1.LeftEndMapped( ) && loc2.LeftEndMapped( ) )
     {    anchor1 = loc1.LeftLoc( );
          anchor2 = loc2.LeftLoc( );    }
     else if ( loc1.RightEndMapped( ) && loc2.RightEndMapped( ) )
     {    anchor1 = loc1.RightLoc( );
          anchor2 = loc2.RightLoc( );    }
     else return True;
     vec<MergedKmerPath> ans;
     MergePaths( p1, p2, anchor1.Interval( ), anchor2.Interval( ), ans );
     return ans.nonempty( );    }

int RemoveImprobablePaths( vec<CompletedInsert>& inserts, int surprise_factor,
     int max_seq )
{    int count = 0;
     for ( int i = 0; i < inserts.isize( ); i++ )
     {    CompletedInsert& I = inserts[i];
          vec<double> surprise( I.NPaths( ) );
          for ( int j = 0; j < I.NPaths( ); j++ )
          {    static vec<Bool> rc;
               rc.resize_and_set( I.Locs(j).size( ), False );
               for ( int k = 0; k < I.Locs(j).size( ); k++ )
                    if ( I.Locs(j)[k].Rc( ) ) rc[k] = True;
               surprise[j] = SurprisingTosses( rc, max_seq );    }
          double M = Max(surprise);
          vec<Bool> remove( I.NPaths( ), False );
          for ( int j = 0; j < I.NPaths( ); j++ )
               if ( surprise[j] < M/double(surprise_factor) ) remove[j] = True;
          if ( Sum(remove) > 0 )
          {    count += Sum(remove);
               vecKmerPath paths;
               vecvec<SloppyReadPlacement> locs;
               for ( int j = 0; j < I.NPaths( ); j++ )
               {    if ( !remove[j] ) 
                    {    paths.push_back( I.Path(j) );
                         locs.push_back( I.Locs(j) );    }    }
               I.PathsMutable( ).clear( );
               I.LocsMutable( ).clear( );
               for ( int j = 0; j < paths.size( ); j++ )
               {    I.PathsMutable( ).push_back( paths[j] );
                    I.LocsMutable( ).push_back( locs[j] );    }    }    }
     return count;    }

Bool Covered( const KmerPath& p, const serfvec<SloppyReadPlacement>& locs )
{    int nk = p.KmerCount( );
     static vec<ho_interval> covered;
     covered.clear( );
     for ( int i = 0; i < locs.size( ); i++ )
     {    const SloppyReadPlacement& l = locs[i];
          int start = ( l.LeftEndMapped( ) ? l.LeftLoc( ).PosOnPath( ) : 0 );
          int stop = ( l.RightEndMapped( ) ? l.RightLoc( ).PosOnPath( ) : nk - 1 );
          covered.push_back( ho_interval( start, stop ) );    }
     return TotalCovered(covered) >= nk - 1;    }

// Second-order compatibility conditions
// 
//                       ---S--->....................<-------
// 
//                                  ----R-->
// ---------------------P---------------------
// 
// Suppose reads R and S are both placed on insert path P.
// If the positions are as shown, and S's insert is closed,
// then R should be placed on one if its paths that aligns 
// to P.

int SecondOrderIncompatible( vec<CompletedInsert>& inserts, 
     const vec<read_pairing>& pairs, const vec<int>& pairs_index,
     vec<Bool>& modified )
{    modified.resize_and_set( inserts.size( ), False );
     int total_dels = 0;

     // Index the inserts by their end reads.

     int nreads = pairs_index.size( );
     static vec<int> to_inserts;
     to_inserts.resize_and_set( nreads, -1 );
     for ( int i = 0; i < inserts.isize( ); i++ )
     {    CompletedInsert& I = inserts[i];
          if ( I.Id1( ) < 0 ) continue;
          to_inserts[ I.Id1( ) ] = to_inserts[ I.Id2( ) ] = i;    }

     // Go through each of the inserts.

     for ( int i = 0; i < inserts.isize( ); i++ )
     {    CompletedInsert& I = inserts[i];
          if ( I.Id1( ) < 0 ) continue;
          for ( int j = 0; j < I.NPaths( ); j++ )
          {    const KmerPath& p = I.Path(j);
               serfvec<SloppyReadPlacement>& locs = I.LocsMutable(j);
               static vec<Bool> locs_del;
               locs_del.resize_and_set( locs.size( ), False );
               static vec<int> placed;
               placed.clear( );
               for ( int u = 0; u < locs.size( ); u++ )
                    placed.push_back( locs[u].ReadId( ) );
               UniqueSort(placed);
               static vec< pair<int,int> > incompatible;
               incompatible.clear( );
               for ( int u1 = 0; u1 < locs.size( ); u1++ )
               {    const SloppyReadPlacement& l1 = locs[u1];
                    int id1 = l1.ReadId( );

                    // Now we have a placement of read id1.  Ignore the case where
                    // the read is unpaired or its pair has no closure or its 
                    // partner is also placed.

                    int pi = pairs_index[id1];
                    if ( to_inserts[id1] < 0 ) continue;
                    if ( BinMember( placed, pairs[pi].Partner(id1) ) ) continue;

                    // Go through the other read placements.

                    for ( int u2 = 0; u2 < locs.size( ); u2++ )
                    {    if ( u1 == u2 ) continue;
                         const SloppyReadPlacement& l2 = locs[u2];
                         int id2 = l2.ReadId( );

                         // Now we have read placements defined by l1 and l2.
                         // Make sure they have the same orientation.

                         if ( l1.Rc( ) != l2.Rc( ) ) continue;

                         // In the forward case, check that l1 starts to the left
                         // of l2, and in the reverse case, check that l1 stops to
                         // the right of l2, so that in either case, l2 has to be on
                         // l1's insert.
                         //
                         // We should actually check that l2 can't somehow fit into
                         // a hole in l1's insert paths.

                         if ( !l1.LeftEndMapped( ) && !l1.RightEndMapped( ) ) 
                              continue;
                         if ( !l2.LeftEndMapped( ) && !l2.RightEndMapped( ) ) 
                              continue;
                         if ( l1.Fw( ) )
                         {    KmerPathLocAlt loc1 = ( l1.LeftEndMapped( ) 
                                   ? l1.LeftLoc( ) : l1.RightLoc( ) );
                              KmerPathLocAlt loc2 = ( l2.RightEndMapped( ) 
                                   ? l2.RightLoc( ) : l2.LeftLoc( ) );
                              if ( loc1 > loc2 ) continue;    }
                         else
                         {    KmerPathLocAlt loc1 = ( l1.RightEndMapped( ) 
                                   ? l1.RightLoc( ) : l1.LeftLoc( ) );
                              KmerPathLocAlt loc2 = ( l2.LeftEndMapped( ) 
                                   ? l2.LeftLoc( ) : l2.RightLoc( ) );
                              if ( loc1 < loc2 ) continue;    }

                         // Now check that id2 is placed on an insert path for id1.

                         const CompletedInsert& I1 = inserts[ to_inserts[id1] ];
                         Bool found = False;
                         for ( int j1 = 0; j1 < I1.NPaths( ); j1++ )
                         {    const serfvec<SloppyReadPlacement>& 
                                   locs1 = I1.Locs(j1);
                              for ( int w = 0; w < locs1.size( ); w++ )
                              {    if ( locs1[w].ReadId( ) == id2 )
                                   {    found = True;
                                        break;    }    }
                              if (found) break;    }
                         if (found) continue;

                         // Record incompatibility.

                         incompatible.push_back( make_pair( u1, u2 ) );
                         incompatible.push_back( make_pair( u2, u1 ) );
                         /*
                         cout << "\n";
                         PRINT2( I.Id1( ), I.Id2( ) );
                         PRINT4( i, j, id1, id2 );    
                         */
                              }    }

               // Process incompatibles.

               UniqueSort(incompatible);
               for ( int u = 0; u < incompatible.isize( ); u++ )
               {    int v;
                    for ( v = u + 1; v < incompatible.isize( ); v++ )
                    {    if ( incompatible[v].first != incompatible[u].first )
                              break;    }
                    int u1 = incompatible[u].first;
                    static vec<Bool> del;
                    del.resize_and_set( locs.size( ), False );
                    for ( int w = u; w < v; w++ )
                         del[ incompatible[w].second ] = True;
                    static serfvec<SloppyReadPlacement> dlocs;
                    dlocs = locs;
                    EraseIf( dlocs, del );
                    if ( !Covered( p, dlocs ) )
                    {    locs_del[u1] = True;
                         modified[i] = True;
                         /*
                         cout << "i = " << i << ", j = " << j
                              << ", remove " << locs[u1].ReadId( )
                              << " [" << u1 << "]\n";    
                         */
                              }
                    u = v - 1;    }

               // Delete placements.

               EraseIf( locs, locs_del );    
               total_dels += Sum(locs_del);    }    }

     return total_dels;    }

int RemoveIncompatibleReads( vec<CompletedInsert>& inserts, int nreads,
     vec<Bool>& modified, int mpm )
{    modified.resize_and_set( inserts.size( ), False );
     vec<CompletedInsert> insertsr = inserts;
     for ( int i = 0; i < insertsr.isize( ); i++ )
          insertsr[i].Reverse( );
     vec<int> to_ci( nreads, -1 );
     for ( int i = 0; i < inserts.isize( ); i++ )
     {    if ( inserts[i].MinPerfectMatch( ) > mpm ) continue;
          if ( inserts[i].Id1( ) < 0 ) continue;
          ForceAssertLt( inserts[i].Id1( ), nreads );
          ForceAssertLt( inserts[i].Id2( ), nreads );
          to_ci[ inserts[i].Id1( ) ] = to_ci[ inserts[i].Id2( ) ] = i;    }
     int removes = 0;
     for ( int i1 = 0; i1 < inserts.isize( ); i1++ )
     {    CompletedInsert& I1 = inserts[i1];
          int id1 = I1.Id1( ), id2 = I1.Id2( );
          if ( id1 < 0 ) continue;
          for ( int j1 = 0; j1 < I1.NPaths( ); j1++ )
          {    const KmerPath& p1 = I1.Path(j1);
               vec<Bool> remove( I1.Locs(j1).size( ), False );
               for ( int k1 = 0; k1 < I1.Locs(j1).size( ); k1++ )
               {    const SloppyReadPlacement& l1 = I1.Locs(j1)[k1];
                    int id = l1.ReadId( );
                    int i2 = to_ci[id];
                    if ( i2 < 0 ) continue;
                    Bool good = False;
                    for ( int pass = 1; pass <= 2; pass++ )
                    {    const CompletedInsert& I2
                              = ( pass == 1 ? inserts[i2] : insertsr[i2] );
                         for ( int j2 = 0; j2 < I2.NPaths( ); j2++ )     
                         {    const KmerPath& p2 = I2.Path(j2);
                              for ( int k2 = 0; k2 < I2.Locs(j2).size( ); k2++ )
                              {    const SloppyReadPlacement& l2 = I2.Locs(j2)[k2];
                                   if ( l2.ReadId( ) != l1.ReadId( ) ) continue;
                                   if ( l2.Rc( ) != l1.Rc( ) ) goto wrong_or;
                                   good = IsCompatible( p1, p2, l1, l2 );
                                   if (good) break;    }
                              if (good) break;    }
                         if ( !good )
                         {    remove[k1] = True;
                              ++removes;
                              modified[i1] = True;
                              // cout << "remove placement of read " << id
                              //      << " from closure path " << j1 << " for "
                              //      << id1 << "/" << id2 << "\n";    
                              break;    }
                         wrong_or: continue;    }    }
               EraseIf( I1.LocsMutable(j1), remove );    }    }
     return removes;    }

int Rewalk( vec<CompletedInsert>& inserts, const vec<Bool>& walk_these,
     const vecKmerPath& all_paths, const vecKmerPath& all_paths_rc, 
     const String& run_dir, int K, const KmerBaseBroker& kbb, 
     const NegativeGapValidator& ngv, int mpm, Bool remove_empty )
{    int paths_removed = 0;
     for ( int in = 0; in < inserts.isize( ); in++ )
     {    if ( !walk_these[in] ) continue;
          CompletedInsert& I = inserts[in];
          static vec<int> use;
          use.clear( );
          for ( int j = 0; j < I.NPaths( ); j++ )
          {    for ( int k = 0; k < I.Locs(j).size( ); k++ )
               {    int id = I.Locs(j)[k].ReadId( );
                    use.push_back(id);    }    }
          UniqueSort(use);
          int n = use.size( );
          static vec<tagged_rpint> segs;
          segs.clear( );
          int numsegs = 0;
          for ( int i = 0; i < n; i++ )
          {    numsegs += all_paths[ use[i] ].NSegments( );
               numsegs += all_paths_rc[ use[i] ].NSegments( );    }
          segs.reserve( numsegs );
          for ( int i = 0; i < n; i++ )
               all_paths[ use[i] ].AppendToDatabase( segs, use[i] );
          for ( int i = 0; i < n; i++ )
               all_paths_rc[ use[i] ].AppendToDatabase( segs, -use[i]-1 );
          Prepare(segs);
          unsigned int heap_size_limit = 1000000000; // essentially infinite
          SetBasedPathWalkHeap< less<PathWalk> > heap;
          KmerPathBreadthSearcher searcher( 
               all_paths, all_paths_rc, segs, &ngv, &heap, heap_size_limit );
          searcher.SetMinPerfectMatch(mpm);
          KmerPathBreadthSearcher::Result search_result;
          searcher.FindClosures( I.Id1( ), I.Id2( ), I.MinLength( ), I.MaxLength( ),
               search_result );
          ForceAssert( !search_result.heapExploded( ) );
          const set<KmerPath>& closures = search_result.closures;
          paths_removed += I.NPaths( ) - closures.size( );
          ReadsOnPathPiler piler( all_paths, all_paths_rc, segs );
          vecKmerPath paths;
          vecvec<SloppyReadPlacement> locs;
          for ( set<KmerPath>::iterator closureIter = closures.begin();
	       closureIter != closures.end(); ++closureIter )
          {    paths.push_back( *closureIter );
	       vec<ReadOnPath> path_pile;
	       list<KmerPath> subpath_storage;
	       piler.PileReadsOnPath( *closureIter, path_pile, subpath_storage );
               serfvec<SloppyReadPlacement> theselocs;
               const KmerPath& p = *closureIter;
               for ( int i = 0; i < path_pile.isize( ); i++ )
               {    const ReadOnPath& r = path_pile[i];
                    const PathEmbedding& into_read = r.intoRead;
                    const PathEmbedding& into_path = r.intoPath;
                    KmerPathLocAlt left_end_to, right_end_to;
                    Bool left_end_mapped = into_read.SubStartLoc( ).atBegin( );
                    Bool right_end_mapped = into_read.SubStopLoc( ).atEnd( );
                    if (left_end_mapped)
                         left_end_to = KmerPathLocAlt( into_path.SubStartLoc( ) );
                    if (right_end_mapped)
                         right_end_to = KmerPathLocAlt( into_path.SubStopLoc( ) );
                    SloppyPathEmbedding e( left_end_mapped, right_end_mapped,
                         left_end_to, right_end_to );
                    int read_id;
                    if ( r.readId >= 0 ) read_id = r.readId;
                    else read_id = -r.readId - 1;
                    SloppyReadPlacement s( read_id, r.readId < 0, e );
                    theselocs.push_back(s);    }
               locs.push_back(theselocs);    }
          CompletedInsert ci( I.Id1( ), I.Id2( ), I.MinLength( ), I.MaxLength( ), 
               I.MinPerfectMatch( ), paths, locs );
          inserts[in] = ci;    }
     if (remove_empty)
     {    int count = 0;
          for ( int i = 0; i < inserts.isize( ); i++ )
          {    if ( inserts[i].NPaths( ) > 0 )
               {    if ( count != i ) inserts[count] = inserts[i];
                    ++count;    }    }
          inserts.resize(count);    }
     return paths_removed;    }

void Clean( vec<CompletedInsert>& inserts, const vecKmerPath& all_paths, 
     const vecKmerPath& all_paths_rc, const String& run_dir, int K, 
     const KmerBaseBroker& kbb, const NegativeGapValidator& ngv, 
     const vec<read_pairing>& pairs, const vec<int>& pairs_index,
     Bool verbose, int mpm, Bool remove_empty, Bool second_order )
{    vec<Bool> modified;
     int nreads = pairs_index.size( ), pass = 1;
     restart:
     while(1)
     {    if (verbose) cout << "\n" << Date( ) << ": PASS " << pass++ << endl;
          int reads_removed 
               = RemoveIncompatibleReads( inserts, nreads, modified, mpm );
          if (verbose)
               cout << reads_removed << " incompatible reads removed" << endl;
          int paths_removed = RemoveHolyPaths( inserts, modified, all_paths, 
               all_paths_rc, run_dir, K, kbb, ngv, mpm, remove_empty );
          if (verbose)
               cout << paths_removed << " incompatible paths removed" << endl;
          if ( paths_removed == 0 ) break;    }
     int reads_removed = RemoveIncompatibleReads( inserts, nreads, modified );
     if (verbose)
          cout << reads_removed << " incompatible reads removed" << endl;    
     if (second_order)
     {    while(1)
          {    int second = SecondOrderIncompatible( 
                    inserts, pairs, pairs_index, modified );
               if (verbose)
                    cout << second 
                         << " reads removed by second-order conditions" << endl;
               if ( second == 0 ) break;
               int paths_removed = RemoveHolyPaths( inserts, modified, all_paths, 
                    all_paths_rc, run_dir, K, kbb, ngv, mpm, remove_empty );
               if (verbose)
                    cout << paths_removed << " incompatible paths removed" << endl;
               if ( paths_removed > 0 ) goto restart;    }    }
     if (verbose)
     {    vec<int> all;
          AllReads( inserts, all );
          cout << all.size( ) << " total distinct reads placed\n";    }    }

void AllReads( const vec<CompletedInsert>& inserts, vec<int>& all )
{    all.clear( );
     for ( int i = 0; i < inserts.isize( ); i++ )
     {    for ( int j = 0; j < inserts[i].NPaths( ); j++ )
          {    for ( int k = 0; k < inserts[i].Locs(j).size( ); k++ )
               {    int id = inserts[i].Locs(j)[k].ReadId( );
                    all.push_back(id);    }    }    }
     UniqueSort(all);    }

int RemoveHolyPaths( vec<CompletedInsert>& inserts, 
          const vec<Bool>& check_these, const vecKmerPath& all_paths, 
          const vecKmerPath& all_paths_rc, const String& run_dir, int K, 
          const KmerBaseBroker& kbb, const NegativeGapValidator& ngv, int mpm,
          Bool remove_empty )
{    int paths_removed = 0;
     for ( int in = 0; in < inserts.isize( ); in++ )
     {    if ( !check_these[in] ) continue;
          CompletedInsert& I = inserts[in];
          if ( I.Id1( ) < 0 ) continue;
          vec<Bool> remove( I.NPaths( ), False );
          for ( int j = 0; j < I.NPaths( ); j++ )
          {    const KmerPath& p = I.Path(j);
               int nk = p.KmerCount( );
               const serfvec<SloppyReadPlacement>& locs = I.Locs(j);
               if ( !Covered( p, locs ) ) 
               {    cout << "removing path with " << p.KmerCount( )
                         << " kmers\n";
                    remove[j] = True;    }    }
          paths_removed += Sum(remove);
          // EraseIf( I.PathsMutable( ), remove );
          // EraseIf( I.LocsMutable( ), remove );
          if ( Sum(remove) > 0 )
          {    vecKmerPath paths;
               vecvec<SloppyReadPlacement> locs;
               for ( int j = 0; j < I.NPaths( ); j++ )
               {    if ( !remove[j] ) 
                    {    paths.push_back( I.Path(j) );
                         locs.push_back( I.Locs(j) );    }    }
               I.PathsMutable( ).clear( );
               I.LocsMutable( ).clear( );
               for ( int j = 0; j < paths.size( ); j++ )
               {    I.PathsMutable( ).push_back( paths[j] );
                    I.LocsMutable( ).push_back( locs[j] );    }    }    }
     if (remove_empty)
     {    int count = 0;
          for ( int i = 0; i < inserts.isize( ); i++ )
          {    if ( inserts[i].NPaths( ) > 0 )
               {    if ( count != i ) inserts[count] = inserts[i];
                    ++count;    }    }
          inserts.resize(count);    }
     return paths_removed;    }

template void ReadPointer( int filedes, SloppyReadPlacement*& ptr );

// RemoveChimeras.  If the following conditions are satisfied, we call a read X
// chimeric:
// * X is not internal to a completed insert.
// * The left and right kmers of X are non-unique.
// * (a) Let L be the left kmer of X, and amongst all reads containing L (other
//       than X), let L' be the rightmost kmer of X which is in some such read; 
//       then amongst all reads containing L' (other than X), let L'' be the 
//       rightmost kmer of X which is in some such read.
//   (b) Let R be the right kmer of X, and amongst all reads containing R (other
//       than X), let R' be the leftmost kmer of X which is in some such read; 
//       then amongst all reads containing R' (other than X), let R'' be the 
//       leftmost kmer of X which is in some such read.
//   (c) We assume that L'' is strictly left of R'', however there is a read 
//       containing L'' and extending right as far as R'', and there is a read 
//       containing R'', and extending left as far as L''.
// We identify chimeras which are end reads of completed inserts, and delete the
// corresponding CompletedInserts.

void RemoveChimeras( vec<CompletedInsert>& inserts, const vecKmerPath& paths,
     const vecKmerPath& paths_rc, const vec<tagged_rpint>& pathsDB )
{
     // Find all end reads which are not placed fully on another insert (enders).

     vec<int> end_reads, embedded_reads, enders, chimeras;
     for ( int i = 0; i < inserts.isize( ); i++ )
     {    const CompletedInsert& I = inserts[i];
          end_reads.push_back( I.Id1( ), I.Id2( ) );
          for ( int j = 0; j < I.NPaths( ); j++ )
          {    for ( int k = 0; k < I.Locs(j).size( ); k++ )
               {    const SloppyReadPlacement& l = I.Locs(j)[k];
                    {    if ( l.BothEndsMapped( ) && l.ReadId( ) != I.Id1( )
                              && l.ReadId( ) != I.Id2( ) )
                         {    embedded_reads.push_back( 
                                   l.ReadId( ) );    }    }    }    }    }
     Sort(end_reads), Sort(embedded_reads);
     for ( int i = 0; i < end_reads.isize( ); i++ )
     {    if ( !BinMember( embedded_reads, end_reads[i] ) )
               enders.push_back( end_reads[i] );    }

     // Check to see which enders are to be called chimeras.

     for ( int i = 0; i < enders.isize( ); i++ )
     {    int id = enders[i];
          const KmerPath& p = paths[id];
          longlong L = p.Segment(0).Start( ), R = p.LastSegment( ).Stop( );
          int jL = -1, jR = -1;
          for ( int pass = 1; pass <= 2; pass++ )
          {    vec<longlong> Lext, Rext;
               Contains( pathsDB, L, Lext ), Contains( pathsDB, R, Rext );
               vec<tagged_rpint> Lright, Rleft;
               for ( int j = 0; j < Lext.isize( ); j++ )
               {    const tagged_rpint& t = pathsDB[ Lext[j] ];
                    Bool rcL = ( t.PathId( ) < 0 );
                    int idL = ( !rcL ? t.PathId( ) : -t.PathId( )-1 );
                    if ( idL == id ) continue;
                    int seg = t.PathPos( );
                    const KmerPath& q = ( !rcL ? paths[idL] : paths_rc[idL] );
                    KmerPathInterval v( L, q.Segment(seg).Stop( ) );
                    v.AppendToDatabase( Lright, 0, 0 );
                    for ( int u = seg + 1; u < q.NSegments( ); u++ )
                         q.Segment(u).AppendToDatabase( Lright, 0, 0 );    }
               for ( int j = 0; j < Rext.isize( ); j++ )
               {    const tagged_rpint& t = pathsDB[ Rext[j] ];
                    Bool rcR = ( t.PathId( ) < 0 );
                    int idR = ( !rcR ? t.PathId( ) : -t.PathId( )-1 );
                    if ( idR == id ) continue;
                    int seg = t.PathPos( );
                    const KmerPath& q = ( !rcR ? paths[idR] : paths_rc[idR] );
                    KmerPathInterval v( q.Segment(seg).Start( ), R );
                    v.AppendToDatabase( Rleft, 0, 0 );
                    for ( int u = 0; u < seg; u++ )
                         q.Segment(u).AppendToDatabase( Rleft, 0, 0 );    }
               Prepare(Lright), Prepare(Rleft);
               vec<longlong> e;
               for ( jL = p.NSegments( ) - 1; jL >= 0; jL-- )
               {    if ( p.isGap(jL) ) continue;
                    Contains( Lright, p.Segment(jL), e );
                    if ( e.nonempty( ) )
                    {    L = p.Segment(jL).Start( );
                         for ( int u = 0; u < e.isize( ); u++ )
                         {    const tagged_rpint& t = Lright[ e[u] ];
                              L = Max( L, Min( t.Stop( ), 
                                   p.Segment(jL).Stop( ) ) );    }
                         break;    }    }    
               for ( jR = 0; jR < p.NSegments( ); jR++ )
               {    if ( p.isGap(jR) ) continue;
                    Contains( Rleft, p.Segment(jR), e );
                    if ( e.nonempty( ) )
                    {    R = p.Segment(jR).Stop( );
                         for ( int u = 0; u < e.isize( ); u++ )
                         {    const tagged_rpint& t = Rleft[ e[u] ];
                              R = Min( R, Max( t.Start( ), 
                                   p.Segment(jR).Start( ) ) );    }
                         break;    }    }    }

          // Now L'' is defined by the updated values of L and jL,
          // and R'' is defined by the updated values of R and jR.

          if ( jL < 0 || jR < 0 || jL > jR || ( jL == jR && L >= R ) ) continue;

          // Check to see that there is a read containing L'' and extending right 
          // as far as R'', and there is a read containing R'', and extending left 
          // as far as L''.

          vec<longlong> Lext, Rext;
          Contains( pathsDB, L, Lext ), Contains( pathsDB, R, Rext );
          int best_farright = 0, best_farleft = 0;
          for ( int j = 0; j < Lext.isize( ); j++ )
          {    const tagged_rpint& t = pathsDB[ Lext[j] ];
               Bool rcL = ( t.PathId( ) < 0 );
               int idL = ( !rcL ? t.PathId( ) : -t.PathId( )-1 );
               if ( idL == id ) continue;
               int seg = t.PathPos( );
               const KmerPath& q = ( !rcL ? paths[idL] : paths_rc[idL] );
               int farright = q.Segment(seg).Stop( ) - L;
               for ( int u = seg + 1; u < q.NSegments( ); u++ )
                    farright += q.MinLength(u);
               best_farright = Max( best_farright, farright );    }
          for ( int j = 0; j < Rext.isize( ); j++ )
          {    const tagged_rpint& t = pathsDB[ Rext[j] ];
               Bool rcR = ( t.PathId( ) < 0 );
               int idR = ( !rcR ? t.PathId( ) : -t.PathId( )-1 );
               if ( idR == id ) continue;
               int seg = t.PathPos( );
               const KmerPath& q = ( !rcR ? paths[idR] : paths_rc[idR] );
               int farleft = R - q.Segment(seg).Start( );
               for ( int u = 0; u < seg; u++ )
                    farleft += q.MinLength(u);
               best_farleft = Max( best_farleft, farleft );    }
          int sep;
          if ( jL == jR ) sep = R - L;
          else
          {    sep = p.Segment(jL).Stop( ) - L + R - p.Segment(jR).Start( );
               for ( int u = jL + 1; u <= jR - 1; u++ )
                    sep += p.MinLength(u);    }
          if ( best_farleft + best_farright < sep + 10 ) continue;
          chimeras.push_back(id);
          // cout << "\n";
          // PRINT3( id, best_farleft, best_farright );    
          // PRINT4( L, jL, R, jR );
          // PRINT(sep);
          // PRINT(p);
               }

     vec<Bool> remove( inserts.size( ), False );
     for ( int i = 0; i < inserts.isize( ); i++ )
     {    const CompletedInsert& I = inserts[i];
          if ( BinMember( chimeras, I.Id1( ) ) || BinMember( chimeras, I.Id2( ) ) )
               remove[i] = True;    }
     EraseIf( inserts, remove );    }


CompletedInsertWriter::CompletedInsertWriter( const String& filename,
                                              const ReadsOnPathPiler& piler )
  : m_piler( piler )
{
  this->Open( filename );
}

CompletedInsertWriter::~CompletedInsertWriter()
{
  this->Close();
}

void CompletedInsertWriter::Open( const String& filename )
{
  m_count = 0;
  Remove(filename);
  m_fd = OpenForWrite(filename);
  WriteBytes( m_fd, &m_count, sizeof(int) );
}

void CompletedInsertWriter::Close()
{
  if ( m_fd != 0 )
  {
    lseek( m_fd, 0, SEEK_SET );
    WriteBytes( m_fd, &m_count, sizeof(int) );
    close(m_fd);
    m_fd = 0;
  }
}

void CompletedInsertBasesOnly::BinaryWrite( int fd ) const
{    WriteBytes( fd, &id1_, sizeof(int) );
     WriteBytes( fd, &id2_, sizeof(int) );
     WriteBytes( fd, &min_length_, sizeof(int) );
     WriteBytes( fd, &max_length_, sizeof(int) );
     WriteBytes( fd, &min_perfect_match_, sizeof(int) );
     paths_.BinaryWrite(fd);    }

void CompletedInsertBasesOnly::BinaryRead( int fd )
{    ReadBytes( fd, &id1_, sizeof(int) );
     ReadBytes( fd, &id2_, sizeof(int) );
     ReadBytes( fd, &min_length_, sizeof(int) );
     ReadBytes( fd, &max_length_, sizeof(int) );
     ReadBytes( fd, &min_perfect_match_, sizeof(int) );
     paths_.BinaryRead(fd);    }

void BinaryRead( const String& filename, vec<CompletedInsertBasesOnly>& inserts )
{    int fd = OpenForRead(filename);
     int n;
     ReadBytes( fd, &n, sizeof(int) );
     inserts.resize(n);
     for ( int i = 0; i < n; i++ )
          inserts[i].BinaryRead(fd);
     close(fd);    }

void BuildLocs( const KmerPath& p, serfvec<SloppyReadPlacement>& theselocs,
     const ReadsOnPathPiler& m_piler )
{    vec<ReadOnPath> path_pile;
     list<KmerPath> subpath_storage;
     m_piler.PileReadsOnPath( p, path_pile, subpath_storage );
     theselocs.clear( );
     for ( int i = 0; i < path_pile.isize( ); i++ )
     {    const ReadOnPath& r = path_pile[i];
          const PathEmbedding& into_read = r.intoRead;
          const PathEmbedding& into_path = r.intoPath;
          KmerPathLocAlt left_end_to, right_end_to;
          Bool left_end_mapped = into_read.SubStartLoc( ).atBegin( );
          Bool right_end_mapped = into_read.SubStopLoc( ).atEnd( );
          if (left_end_mapped)
               left_end_to = KmerPathLocAlt( into_path.SubStartLoc( ) );
          if (right_end_mapped)
               right_end_to = KmerPathLocAlt( into_path.SubStopLoc( ) );
          SloppyPathEmbedding e( left_end_mapped, right_end_mapped,
               left_end_to, right_end_to );
          int read_id;
          if ( r.readId >= 0 ) read_id = r.readId;
          else read_id = -r.readId - 1;
          SloppyReadPlacement s( read_id, r.readId < 0, e );
          theselocs.push_back(s);    }    }

CompletedInsert::CompletedInsert( 

     const int id1, const int id2,             // edge ids
     const pp_pair& p,                         // the pair
     const vec<pp_closure>& closures,          // closures of the pair
     const HyperKmerPath& h,                   // the HyperKmerPath
     const double dmult                        // how much stretch was allowed
          
          )

{    id1_ = id1;
     id2_ = id2;
     double len = p.Gap( );
     for ( int i = 0; i < p.LeftSize( ); i++ )
          len += h.EdgeObject( p.Left(i) ).KmerCount( );
     for ( int i = 0; i < p.RightSize( ); i++ )
          len += h.EdgeObject( p.Right(i) ).KmerCount( );
     min_length_ = int( floor( len - dmult * p.Dev( ) ) );
     max_length_ = int( ceil( len + dmult * p.Dev( ) ) );
     min_perfect_match_ = 1;
     for ( int i = 0; i < closures.isize( ); i++ )
     {    static KmerPath x;
          x.Clear( );
          for ( int j = 0; j < closures[i].isize( ); j++ )
               x.Append( h.EdgeObject( closures[i][j] ) );
          paths_.push_back_reserve(x);    }    }

void CompletedInsert::FillLocs( const ReadsOnPathPiler& m_piler )
{    locs_.clear( );
     for ( int i = 0; i < paths_.size( ); i++ )
     {    serfvec<SloppyReadPlacement> theselocs;
          BuildLocs( paths_[i], theselocs, m_piler );
          locs_.push_back(theselocs);    }    }
