/////////////////////////////////////////////////////////////////////////////
//                   SOFTWARE COPYRIGHT NOTICE AGREEMENT                   //
//       This software and its documentation are copyright (2007) by the   //
//   Broad Institute/Massachusetts Institute of Technology.  All rights    //
//   are reserved.  This software is supplied without any warranty or      //
//   guaranteed support whatsoever. Neither the Broad Institute nor MIT    //
//   can be responsible for its use, misuse, or functionality.             //
/////////////////////////////////////////////////////////////////////////////

// Program: RemoveSuspiciousReads
//
// Get rid of reads containing a kmer which does not appear anywhere else, 
// or reads that cannot be constructed from other reads.
//
// Files modified:
//   reads.fastb
//   reads.pairto
//   reads.pairtob
//   reads.pairto_index
//   reads.paths
//   reads.paths_rc
//   reads.pathsdb
//   reads.true.fastb (when SIM=True)
//   reads.ref.locs* (when SIM=True)
//
// Files created (just as a backup):
//   reads.before_removing_unique_kmers.fastb
//   reads.before_removing_unique_kmers.pairto
//   reads.before_removing_unique_kmers.pairtob
//   reads.before_removing_unique_kmers.pairto_index
//   reads.before_removing_unique_kmers.paths
//   reads.before_removing_unique_kmers.paths_rc
//   reads.before_removing_unique_kmers.pathsdb
//   reads.before_removing_unique_kmers.true.fastb (when SIM=True)
//   reads.before_removing_unique_kmers.ref.locs* (when SIM=True)
//
// Part of <Read filtering>.

#include "MainTools.h"

#include "Basevector.h"
#include "KmerRecord.h"
#include "ParseSet.h"
#include "ReadLocation.h"
#include "ReadPairing.h"
#include "KmerShape.h"
#include "kmer_freq/KmerShortMap.h"
#include "paths/KmerPath.h"
#include "paths/NearUnipathErrors.h"
#include "paths/ReadsToPathsCoreX.h"
#include "paths/Unipath.h"
#include "paths/UnipathCoverageCore.h"

/*
   Local function: FindWeakReads

   Identify reads that contain a <weak kmer>, according to the given <strong kmers table>.
*/
void FindWeakReads( const KmerShapeId& kmerShapeId,
		    const vecbasevector& reads,
		    const String& tableName,
		    vec<read_id_t>& reads_to_delete ) 
{
  KmerShortMap strongTable( kmerShapeId, tableName );

  cout << "RemoveSuspiciousReads: Finding weak reads among " <<
    reads.size() << " reads..." << endl;
  for ( int i = 0; i < reads.size(); ++i ) {
    if ( !(i % 100000) )
      cout << Date() << " i=" << i << endl;
    if ( ! strongTable.IsStrong( reads[i] ) )
      reads_to_delete.push_back( i );
  }
  
  UniqueSort( reads_to_delete );
}


void FindUnbuildableReads( const vec<tagged_rpint>& paths_db,
			   const vecKmerPath& paths,
			   const vecKmerPath& paths_rc,

			   const vec<nbases_t>& PATH_KS,
			   const vec< vec<tagged_rpint> >& extra_paths_db,
			   const vec< vecKmerPath >& extra_paths,
			   const vec< vecKmerPath >& extra_paths_rc,			   
			   
			   vec<read_id_t>& readsToDelete ) 
{
  vec<read_id_t> unbuildableReads;
  cout << "finding unbuildable reads among " << paths.size() << " total reads of which " <<
    readsToDelete.size() << " were deleted..." << endl;
  HashSimple readsToDeleteHash(65536);
  for ( int i=0; i < readsToDelete.isize(); i++ )
    readsToDeleteHash.Insert( readsToDelete[i] );

  vec<int> numFoundBuildable( PATH_KS.size(), 0 );
  for ( read_id_t i = 0; i < paths.size(); ++i ) {
    if ( !(i % 40000) ) {
      cout << Date() << "  i=" << i << " ";
      for ( int z = 0; z < PATH_KS.isize(); z++ )
	cout << " buildable with K=" << PATH_KS[z] << " : " << numFoundBuildable[z] << " ";
      cout << endl;
    }
    
    if ( readsToDeleteHash.Has( i ) )
      continue;

    Bool buildFound = False;
    for ( int z = PATH_KS.isize() - 1; !buildFound && z >= 0; z-- ) {
      if ( SubContig( extra_paths[z][i], extra_paths[z], extra_paths_rc[z], extra_paths_db[z],
		      &readsToDeleteHash, i ) ) {
	buildFound = True;
	numFoundBuildable[z]++;
      }
    }
    
    if ( !buildFound  &&  ! SubContig( paths[i], paths, paths_rc, paths_db, &readsToDeleteHash, i ) )
      unbuildableReads.push_back( i );
  }

  readsToDelete.append( unbuildableReads );
  UniqueSort( readsToDelete );
}


int main( int argc, char *argv[] )
{
  RunTime( );

  BeginCommandArguments;
  CommandArgument_String(PRE);
  CommandArgument_String(DATA);
  CommandArgument_String(RUN);
  CommandArgument_UnsignedInt(K);
  CommandArgument_IntSet_OrDefault_Doc(PATH_KS, "", "additional kmer sizes for which read paths have been created");
  CommandArgument_String(LOW_FREQ_K);
  CommandArgument_String_OrDefault(PATHS_BASE,"reads");
  CommandArgument_String_OrDefault(BACKUP_BASE,
				   "before_removing_suspicious");
  // if RESTORE_BACKUP=True, restore original path files and exit.
  CommandArgument_Bool_OrDefault(RESTORE_BACKUP, False);
  CommandArgument_Bool_OrDefault(SIM, False);
  CommandArgument_Bool_OrDefault(REMOVE_UNBUILDABLE, True);
  CommandArgument_Bool_OrDefault(RECOVER_GAPS, False);
  CommandArgument_Bool_OrDefault(RECOVER_GAPS_UNEDITED, False);
  CommandArgument_Bool_OrDefault(VERBOSE, False);
  CommandArgument_Bool_OrDefault(FIX_NEAR_UNIPATH_ERRORS, False);
  CommandArgument_String_OrDefault(STRONG_IN, "")
  CommandArgument_Int_OrDefault(PLOIDY, 1 );

  EndCommandArguments;

  vec<KmerShapeId> low_freq_k;
  ParseKmerShapeIdSet( LOW_FREQ_K, low_freq_k, false /* don't abort if bad */ );
  UniqueSort( low_freq_k );

  UniqueSort( PATH_KS );

  String run_dir = PRE + "/" + DATA + "/" + RUN;
  String paths_base = run_dir + "/" + PATHS_BASE;
  String backup_base = paths_base + "." + BACKUP_BASE;

  if ( ! IsDirectory( run_dir ) ) {
    cout << "The directory " << run_dir << " does not exist." << endl;
    TracebackThisProcess();
  }

  // Load protected read file.

  vec<Bool> Protected;
  if ( IsRegularFile(run_dir + "/reads.protected.orig") )
    BinaryRead3(run_dir + "/reads.protected.orig" , Protected );

  // Restore backups.
  
  if( RESTORE_BACKUP ) {
    Mv(backup_base + ".fastb", paths_base + ".fastb");
    Mv(backup_base + ".pairto", paths_base + ".pairto");
    Mv(backup_base + ".pairtob", paths_base + ".pairtob");
    Mv(backup_base + ".pairto_index", paths_base + ".pairto_index");

    for ( int i=-1; i < PATH_KS.isize(); i++ ) {
      String k = ToString( i<0 ? K : PATH_KS[i] );
      Mv(backup_base + ".paths.k" + k,   
         paths_base + ".paths.k" + k );
      Mv(backup_base + ".paths_rc.k" + k,
         paths_base + ".paths_rc.k" + k );
      Mv(backup_base + ".pathsdb.k" + k, 
         paths_base + ".pathsdb.k" + k );
    }
    if ( SIM ) {
      Mv(backup_base + ".true.fastb", paths_base + ".true.fastb");
      Mv(backup_base + ".ref.locs", paths_base + ".ref.locs");
      Mv(backup_base + ".ref.locs_index", paths_base + ".ref.locs_index");
      Mv(backup_base + ".ref.locs_indexr", paths_base + ".ref.locs_indexr");
    }
    for ( int i = 0; i < low_freq_k.isize(); ++i ) {
      String kstr = ToString( low_freq_k[i] );
      if ( IsRegularFile( backup_base + ".fastb.nonunique.k" + kstr ) )
        Mv( backup_base + ".fastb.nonunique.k" + kstr, 
             paths_base + ".fastb.nonunique.k" + kstr );
    }
    Remove( paths_base + ".id_map" );
    cout << "Restored original paths files.  Exiting." << endl;
    exit(0);
  }
  if( IsRegularFile( backup_base + ".paths.k" + ToString(K) ) ) {
    FatalErr( "The backup files, eg " << BACKUP_BASE + ".paths.k" + ToString(K)
	      << ",\nalready exist.  To operate on the original data again,"
	      << "\nrun with RESTORE_BACKUP=True once, then rerun this command."
	      << endl );
  }	      

  // Back up the old paths files, just in case:

  Mv(paths_base + ".fastb",   backup_base + ".fastb");
  Mv(paths_base + ".pairto",   backup_base + ".pairto");
  Mv(paths_base + ".pairtob", backup_base + ".pairtob");
  Mv(paths_base + ".pairto_index", backup_base + ".pairto_index");

  for ( int i = -1; i < PATH_KS.isize(); i++) {
    String k = ToString( i<0 ? K : PATH_KS[i] );
    Mv(paths_base + ".paths.k" + k,
       backup_base + ".paths.k" + k);
    Mv(paths_base + ".paths_rc.k" + k,
       backup_base + ".paths_rc.k" + k);
    Mv(paths_base + ".pathsdb.k" + k, 
       backup_base + ".pathsdb.k" + k);
  }
  
  if ( SIM ) {
    Mv(paths_base + ".true.fastb", backup_base + ".true.fastb");
    Mv(paths_base + ".ref.locs", backup_base + ".ref.locs");
    Mv(paths_base + ".ref.locs_index", backup_base + ".ref.locs_index");
    Mv(paths_base + ".ref.locs_indexr", backup_base + ".ref.locs_indexr");
  }
  for ( int i = 0; i < low_freq_k.isize(); ++i ) {
    String kstr = ToString( low_freq_k[i] );
    if ( IsRegularFile( paths_base + ".fastb.nonunique.k" + kstr ) )
      Mv( paths_base + ".fastb.nonunique.k" + kstr, 
           backup_base + ".fastb.nonunique.k" + kstr );
  }

  // Compute paths_orig.

  vecKmerPath paths_orig;
  if ( RECOVER_GAPS_UNEDITED ) {
    vecbasevector reads_orig( paths_base + ".fastb.orig" );
    longlong genome_size = 0;
    if ( IsRegularFile( PRE + "/" + DATA + "/genome.size" ) ) {
      genome_size = StringOfFile( PRE + "/" + DATA + "/genome.size", 1 ).Int( );    
    }
    else {
      ForceAssert( IsRegularFile( PRE + "/" + DATA + "/genome.fastb" ) );
      vecbasevector genome( PRE + "/" + DATA + "/genome.fastb" );
      for ( int i = 0; i < genome.size( ); i++ )
        genome_size += genome[i].size( );    
    }
    cout << "Pathing unedited reads." << endl;
    ReadsToPathsCoreX( reads_orig, K, genome_size, paths_orig );    
  }

  // Load data.

  vecKmerPath paths( backup_base + ".paths.k" + ToString(K) );
  vecKmerPath paths_rc( backup_base + ".paths_rc.k" + ToString(K) );
  BREAD2( backup_base + ".pathsdb.k" + ToString(K), vec<tagged_rpint>, paths_db );

  vec< vecKmerPath > extra_paths( PATH_KS.size() );
  vec< vecKmerPath > extra_paths_rc( PATH_KS.size() );
  vec< vec<tagged_rpint> > extra_paths_db( PATH_KS.size() );

  for ( int i = 0; i < PATH_KS.isize(); i++ ) {
    extra_paths[i].ReadAll( backup_base + ".paths.k" + ToString( PATH_KS[i] ) );
    extra_paths_rc[i].ReadAll( backup_base + ".paths_rc.k" + ToString( PATH_KS[i] ) );
    BinaryRead2( backup_base + ".pathsdb.k" + ToString( PATH_KS[i] ), extra_paths_db[i] );
  }
  

  // Determine which reads are unmodified and which are perfect.

  vec<Bool> unmodified( paths.size( ) ), perfect;
  {
    vecbasevector reads( backup_base + ".fastb" );
    vecbasevector reads_orig( paths_base + ".fastb.orig" );
    for ( int i = 0; i < reads.size( ); i++ )
      unmodified[i] = ( reads[i] == reads_orig[i] );
    if (SIM) {
      vecbasevector true_reads( backup_base + ".true.fastb" );
      perfect.resize( reads.size( ) );
      for ( int i = 0; i < reads.size( ); i++ )
        perfect[i] = ( reads[i] == true_reads[i] );
    }
  }

  // Load true read locations.

  vec<read_location> ref_locs;
  if ( SIM ) {
    READX( backup_base + ".ref.locs", ref_locs );
  }

  // Find weak reads.

  vec<read_id_t> reads_to_delete;
  String strongKmersFileBase = paths_base + ".fastb.orig.strong.k";
  if (STRONG_IN != "")
    strongKmersFileBase = run_dir + "/" + STRONG_IN + ".k";
  if ( low_freq_k.nonempty( ) ) {
    vecbasevector reads( backup_base + ".fastb" );
    for ( int i = 0; i < low_freq_k.isize(); ++i ) {
      String kmers_file = strongKmersFileBase + ToString(low_freq_k[i]);
      FindWeakReads( low_freq_k[i], reads, kmers_file, reads_to_delete );
      
      // Remove protected reads from reads_to_delete.
      
      if (!Protected.empty()) {
        vec<read_id_t> reads_to_delete2;
        for ( int i = 0; i < reads_to_delete.isize( ); i++ ) {
          if ( !Protected[ reads_to_delete[i] ] )
            reads_to_delete2.push_back( reads_to_delete[i] );    
        }
        reads_to_delete = reads_to_delete2;
      }
      BinaryWrite3( paths_base + ".deleted_by_lowfreq", reads_to_delete );
      PRINT( reads_to_delete.size() );    
    }
  }

  int numReadsWithLowFreqKmers = reads_to_delete.size();
  cout << "Reads with low-frequency kmers: " << numReadsWithLowFreqKmers << endl;

  if ( REMOVE_UNBUILDABLE ) {
    FindUnbuildableReads( paths_db, paths, paths_rc,
			  PATH_KS, extra_paths_db, extra_paths, extra_paths_rc,
			  reads_to_delete );

    int numUnbuildableReads = reads_to_delete.size() - numReadsWithLowFreqKmers;
    cout << "Unbuildable reads: " << numUnbuildableReads << endl;
  }
  
  // Attempt to recover some of the deleted reads:
  // 1. Find non-deleted reads whose last kmer does not have a next next kmer
  //    in the non-deleted reads.
  // 2. Find deleted reads that extend these reads, whose last kmer is in an
  //    non-deleted read, and which aligns to that read.
  // 3. Undelete these deleted reads.
  //
  // If RECOVER_GAPS_UNEDITED, then also do the following (possibly very expensive)
  // steps:
  // 4. Find reads as in #1 that are unedited and for which #2 failed.
  // 5. Find the non-deleted, before-error-correction reads that extend these
  //    reads, whose last kmer is in a before-error-correction non-deleted read,
  //    and which align to that read.
  // 6. Revert these reads to their unedited version.

  if (RECOVER_GAPS) {
    cout << "\nAttempting to recover deleted reads that cross gaps:\n";
    longlong genome_size = 0;
    if ( IsRegularFile( PRE + "/" + DATA + "/genome.size" ) ) {
      genome_size = StringOfFile( PRE + "/" + DATA + "/genome.size", 1 ).Int( );    
    }
    else {
      ForceAssert( IsRegularFile( PRE + "/" + DATA + "/genome.fastb" ) );
      vecbasevector genome( PRE + "/" + DATA + "/genome.fastb" );
      for ( int i = 0; i < genome.size( ); i++ )
        genome_size += genome[i].size( );    
    }
    vecKmerPath paths_rc_orig(paths_orig);
    for ( int i = 0; i < paths_rc_orig.size( ); i++ )
      paths_rc_orig[i].Reverse( );
    vec<tagged_rpint> paths_db_orig;
    if ( RECOVER_GAPS_UNEDITED )
      CreateDatabase( paths_orig, paths_rc_orig, paths_db_orig );
    vec<int> recover, revert;
    vec<Bool> to_delete( paths.size( ) );
    for ( int i = 0; i < reads_to_delete.isize( ); i++ )
      to_delete[ reads_to_delete[i] ] = True;
    vec<int> locs_index;
    if (SIM) {
      locs_index.resize( paths.size( ) );
      for ( int i = 0; i < ref_locs.isize( ); i++ ) {
        const read_location& rl = ref_locs[i];
        locs_index[ rl.ReadId( ) ] = i;    
      }
    }
    for ( read_id_t id = 0; id < paths.size( ); id++ ) {
      if ( VERBOSE && SIM && !(id % 20000) )
	cout << Date() << "  id=" << id << endl;
      if ( to_delete[id] ) continue;
      for ( int pass = 1; pass <= 2; pass++ ) {
        const KmerPath& p = ( pass == 1 ? paths[id] : paths_rc[id] );
        kmer_id_t last = p.LastSegment( ).Stop( );

	KmerOccurrenceIter<tagged_rpint> kmerOccIter( paths_db, last );
	
        Bool terminal = True;
        while ( kmerOccIter.hasNext() ) {
          const tagged_rpint& t = paths_db[ kmerOccIter.next() ];
          read_id_t idx = t.ReadId( );
          if ( to_delete[idx] ) continue;
          const KmerPath& q = ( t.PathId( ) >= 0 ? paths[idx] : paths_rc[idx] );
          if ( last < t.Stop( ) || t.PathPos( ) < q.NSegments( ) - 1 ) {
            terminal = False;
            break;
          }
        }
        if ( !terminal ) continue;
        int extensions = 0;
        if ( VERBOSE && SIM )
          cout << "\nid = " << id << ", last = " << last << "\n";
	KmerOccurrenceIter<tagged_rpint> kmerOccIter2( paths_db, last );
        while ( kmerOccIter2.hasNext() ) {
          const tagged_rpint& t = paths_db[ kmerOccIter2.next() ];
          int idx = t.ReadId( );
          if ( !to_delete[idx] ) continue;
          const KmerPath& q = ( t.PathId( ) >= 0 ? paths[idx] : paths_rc[idx] );
          int qpos = t.PathPos( );
          if ( !( last < t.Stop( ) || qpos < q.NSegments( ) - 1 ) )
            continue;
          longlong lastx = q.LastSegment( ).Stop( );

	  
          KmerOccurrenceIter<tagged_rpint> kmerOccIter3( paths_db, lastx );
          Bool last_good = False;
          while ( kmerOccIter3.hasNext() ) {
            const tagged_rpint& t = paths_db[ kmerOccIter3.next() ];
            int idy = t.ReadId( );
            const KmerPath& r = ( t.PathId( ) >= 0 ? paths[idy] : paths_rc[idy] );
            if ( !to_delete[idy] ) {
              if ( !ProperOverlapExt( q, r, q.NSegments( ) - 1, t.PathPos( ) ) ) {
                continue;
              }
              last_good = True;
              break;
            }
          }
          if ( !last_good ) continue;
          if ( !ProperOverlapExt( p, q, p.NSegments( ) - 1, qpos ) )
            continue;
          if ( VERBOSE && SIM ) {
            const read_location& rl = ref_locs[ locs_index[idx] ];
            cout << "restoring "
                 << idx << " " << rl.Contig( ) << "." 
                 << rl.Start( ) << "-" << rl.Stop( ) + 1 << " "
                 << ( perfect[idx] ? "TRUE" : "FALSE" ) 
                 << "\n";
            cout << q << "\n";    
          }
          ++extensions;
          recover.push_back(idx);    
        }
        if ( extensions > 0 ) continue;
        if ( !unmodified[id] ) continue;
        if ( !RECOVER_GAPS_UNEDITED ) continue;
        const KmerPath& po = ( pass == 1 ? paths_orig[id] : paths_rc_orig[id] );
        kmer_id_t lasto = po.LastSegment( ).Stop( );
        KmerOccurrenceIter<tagged_rpint> kmerOccIter4( paths_db_orig, lasto );
        while ( kmerOccIter4.hasNext() ) {
          const tagged_rpint& t = paths_db_orig[ kmerOccIter4.next() ];
          int idx = t.ReadId( );
          if ( to_delete[idx] ) continue;
          const KmerPath& q = ( t.PathId( ) >= 0 
                                ? paths_orig[idx] : paths_rc_orig[idx] );
          int qpos = t.PathPos( );
          if ( !( lasto < t.Stop( ) || qpos < q.NSegments( ) - 1 ) )
            continue;
          longlong lastx = q.LastSegment( ).Stop( );

          KmerOccurrenceIter<tagged_rpint> kmerOccIter5( paths_db_orig, lastx );
          Bool last_good = False;
          while ( kmerOccIter5.hasNext() ) {
            const tagged_rpint& t = paths_db_orig[ kmerOccIter5.next() ];
            int idy = t.ReadId( );
            const KmerPath& r = ( t.PathId( ) >= 0 
                                  ? paths_orig[idy] : paths_rc_orig[idy] );
            if ( !to_delete[idy] ) {
              if ( !ProperOverlapExt( q, r, q.NSegments( ) - 1, t.PathPos( ) ) ) {
                continue;
              }
              last_good = True;
              break;
            }
          }
          if ( !last_good ) continue;
          if ( !ProperOverlapExt( po, q, po.NSegments( ) - 1, qpos ) )
            continue;
          if ( VERBOSE && SIM ) {
            const read_location& rl = ref_locs[ locs_index[idx] ];
            cout << "reverting "
                 << idx << " " << rl.Contig( ) << "." 
                 << rl.Start( ) << "-" << rl.Stop( ) + 1 << " "
                 << ( perfect[idx] ? "TRUE" : "FALSE" ) << " "
                 << ( unmodified[idx]
                      ? "ORIGINAL" : "EDITED" ) << "\n";
            cout << q << "\n";    
          }
          ++extensions;
          revert.push_back(idx);
        }
      }
    }
    UniqueSort(recover), UniqueSort(revert);
    cout << "\nRecovering " << recover.size( ) << " reads\n";
    vec<read_id_t> reads_to_delete2;
    for ( int i = 0; i < reads_to_delete.isize( ); i++ ) {
      if ( !BinMember( recover, reads_to_delete[i] ) )
        reads_to_delete2.push_back( reads_to_delete[i] );    
    }
    reads_to_delete = reads_to_delete2;
    /*
      cout << "\nReverting " << revert.size( ) << " reads\n";
      for ( int i = 0; i < revert.isize( ); i++ )
      reads[ revert[i] ] = reads_orig[ revert[i] ];    
      if ( revert.nonempty( ) )
      {    ReadsToPathsCoreX( reads, K, genome_size, paths );
      paths_rc = paths;
      for ( int i = 0; i < paths_rc.size( ); i++ )
      paths_rc[i].Reverse( );
      CreateDatabase( paths, paths_rc, paths_db );    }    
    */
  }

  // Load reads.
  
  vecbasevector reads( backup_base + ".fastb" );
  
  // Fix near unipath errors.
  
  if (FIX_NEAR_UNIPATH_ERRORS) {
    vec<int> id_map_back;
    vec<int>::iterator r = reads_to_delete.begin( );
    int curr_id = 0;
    for ( int i = 0; i < reads.size( ); ++i ) {
      if ( i == *r ) 
        ++r;
      else {
        id_map_back.push_back(i);
        if ( i != curr_id ) {
          paths.SwapElements( curr_id, i );
          paths_rc.SwapElements( curr_id, i );    
        }
        ++curr_id;
      }
    }
    paths.resize(curr_id);
    paths_rc.resize(curr_id);
    CreateDatabase( paths, paths_rc, paths_db );
    vecKmerPath unipaths;
    vec<tagged_rpint> unipathsdb;
    Unipath( paths, paths_rc, paths_db, unipaths, unipathsdb );
    BREAD2( run_dir + "/reads.lengths", vec<int>, lengths );
    EraseTheseIndices( lengths, reads_to_delete );
    vecvec<pdf_entry> cp;
    UnipathCoverageCore( K, paths, paths_rc, paths_db, unipaths, unipathsdb,
                         lengths, cp, -1, 0.0001, 0.0, 0, PLOIDY );
    vec<int> predicted_copyno( unipaths.size( ), -1 );
    vec<double> predicted_copyno_p( unipaths.size( ), -1 );
    for ( int i = 0; i < unipaths.size( ); i++ ) {
      int copyno = -1;
      double maxp = 0;
      for ( int j = 0; j < cp[i].size( ); j++ ) {
        if ( cp[i][j].second > maxp ) {
          copyno = cp[i][j].first;
          maxp = cp[i][j].second;
        }
      }
      predicted_copyno[i] = copyno;
      predicted_copyno_p[i] = maxp;    
    }
    vec<Bool> to_delete;
    NearUnipathErrors( paths, paths_rc, paths_db, unipaths, predicted_copyno,
                       predicted_copyno_p, to_delete );
    for ( int id0 = 0; id0 < to_delete.isize( ); id0++ ) {
      if ( to_delete[id0] ) {
        int id = id_map_back[id0];
        if ( SIM && perfect[id] ) {
          cout << "Warning: deleting read " << id
               << " (was labeled " << id0 << "), which "
               << "is a correct read.\n";    
        }
        reads_to_delete.push_back(id);
      }
    }
    UniqueSort(reads_to_delete);
    paths.ReadAll( backup_base + ".paths.k" + ToString(K) );
    paths_rc.ReadAll( backup_base + ".paths_rc.k" + ToString(K) );
    BREADX2( backup_base + ".pathsdb.k" + ToString(K), paths_db );    
  }

  // Remove protected reads from reads_to_delete.

  if (!Protected.empty()) {
    vec<read_id_t> reads_to_delete2;
    for ( int i = 0; i < reads_to_delete.isize( ); i++ ) {
      if ( !Protected[ reads_to_delete[i] ] )
	reads_to_delete2.push_back( reads_to_delete[i] );    
    }
    reads_to_delete = reads_to_delete2;
  }

  cout << "Deleting " << reads_to_delete.size() 
       << " suspicious reads (out of " << paths.size() << ")," 
       << " leaving " << paths.size() - reads_to_delete.size() << "." << endl;

  // Load some more data.

  vec<read_pairing> pairs;
  ReadPairsFile( backup_base + ".pairto", pairs );
  vecbasevector true_reads;
  if (SIM) true_reads.ReadAll( backup_base + ".true.fastb" );

  // Remove old things.

  vec<int> id_map( reads.size(), -1 );
  vec<int>::iterator r = reads_to_delete.begin();
  int curr_id = 0;
  for ( int i = 0; i < id_map.isize(); ++i ) {
    if ( i == *r )
      ++r;
    else {
      id_map[i] = curr_id;
      if ( i != curr_id ) {
        reads.SwapElements( curr_id, i );
        paths.SwapElements( curr_id, i );
        paths_rc.SwapElements( curr_id, i );
	for ( int z = 0; z < PATH_KS.isize(); z++ ) {
	  extra_paths[z].SwapElements( curr_id, i );
	  extra_paths_rc[z].SwapElements( curr_id, i );
	}
        if ( SIM )
          true_reads.SwapElements( curr_id, i );
      }
      ++curr_id;
    }
  }

  reads.resize( curr_id );
  paths.resize( curr_id );
  paths_rc.resize( curr_id );
  for ( int z = 0; z < PATH_KS.isize(); z++ ) {
    extra_paths[z].resize( curr_id );
    extra_paths_rc[z].resize( curr_id );
  }

  if ( SIM ) {
    true_reads.resize( curr_id );

    for ( unsigned int i = 0; i < ref_locs.size(); ++i )
      if ( id_map[ ref_locs[i].ReadId() ] < 0 )
        ref_locs[i].Kill();
      else
        ref_locs[i].SetReadId( id_map[ ref_locs[i].ReadId() ] );
    
    ref_locs.erase( remove_if( ref_locs.begin(), ref_locs.end(),
                               mem_fun_ref( &read_location::IsDead ) ),
                    ref_locs.end() );
  }

  for ( unsigned int p = 0; p < pairs.size(); ++p ) {
    if ( id_map[ pairs[p].id1 ] < 0 ||
         id_map[ pairs[p].id2 ] < 0 )
      pairs[p].Kill();
    else {
      pairs[p].id1 = id_map[ pairs[p].id1 ];
      pairs[p].id2 = id_map[ pairs[p].id2 ];
    }
  }
  
  int pairsBefore = pairs.size();
  pairs.erase( remove_if( pairs.begin(), pairs.end(), 
                          mem_fun_ref( &read_pairing::Dead ) ),
               pairs.end() );
  int pairsRemoved = pairsBefore - pairs.size();

  cout << "Removed " << pairsRemoved << " pairs, leaving " << pairs.size() << "." << endl;

  // Save new files:
  reads.WriteAll( paths_base + ".fastb" );
  paths.WriteAll( paths_base + ".paths.k" + ToString(K) );
  paths_rc.WriteAll( paths_base + ".paths_rc.k" + ToString(K) );

  for ( int z = 0; z < PATH_KS.isize(); z++ ) {
    extra_paths[z].WriteAll( paths_base + ".paths.k" + ToString(PATH_KS[z]) );
    extra_paths_rc[z].WriteAll( paths_base + ".paths_rc.k" + ToString(PATH_KS[z]) );
  }
  
  
  WritePairs( run_dir, pairs, reads.size() );
  if ( SIM ) {
    true_reads.WriteAll( paths_base + ".true.fastb" );
    WriteLocs( paths_base + ".ref.locs", ref_locs, ref_locs.back().Contig() + 1, reads.size() );
  }

  // Create new database, by trimming down the old one in-place:

  for ( int z = -1; z < PATH_KS.isize(); z++ ) {

    vec<tagged_rpint>& this_paths_db =  z<0 ? paths_db : extra_paths_db[z];
    
    vec<tagged_rpint>::iterator rpi, rpj;
    for( rpi = rpj = this_paths_db.begin(); rpi != this_paths_db.end(); rpi++ )
      if( id_map[ rpi->ReadId() ] >= 0 ) {
	int new_path_id = id_map[ rpi->ReadId() ];
	if ( rpi->PathId() < 0 )
	  new_path_id = -new_path_id-1;
	rpi->Set( rpi->Start(), rpi->Length(), new_path_id, rpi->PathPos() );
	*rpj++ = *rpi;
      }
    
    this_paths_db.erase(rpj, rpi);
    
    // But we still need to Prepare() again, to set lookbacks correctly.
    Prepare(this_paths_db);
    
    BinaryWrite3( paths_base + ".pathsdb.k" + ToString(z<0 ? K : PATH_KS[z]), this_paths_db );
    
  }
  BinaryWrite2( paths_base + ".id_map", id_map );
  
}

// Synonyms: Various synonyms
//
//     suspicious read - See <RemoveSuspiciousReads>
//
