/////////////////////////////////////////////////////////////////////////////
//                   SOFTWARE COPYRIGHT NOTICE AGREEMENT                   //
//       This software and its documentation are copyright (2005) by the   //
//   Broad Institute/Massachusetts Institute of Technology.  All rights    //
//   are reserved.  This software is supplied without any warranty or      //
//   guaranteed support whatsoever. Neither the Broad Institute nor MIT    //
//   can be responsible for its use, misuse, or functionality.             //
/////////////////////////////////////////////////////////////////////////////



/// MuxSearchInsert:
///
/// For an overview of Mux-based insert walking, see paths/doc/mux.pdf
///
/// Walk across inserts in a completely depth-first manner, hopping
/// from each (filling of a) read to any of its minimal extensions.
/// Optionally, use only "good" reads, with an easy mechanism for adding 
/// context-sensitive notions of goodness.
///
/// The search only forces local compatibility between successive
/// reads, so successfully locating a closing read does *not* mean the
/// resulting path is globally legal.
///
/// The result of the search is a graph-like structure in which we
/// merge together any cases of the same read appearing at the same
/// offset from the end of the walk.


#include "FastIfstream.h"
#include "Feudal.h"
#include "math/Functions.h"
#include "MainTools.h"
#include "PairsHandler.h"
#include "ParseSet.h"
#include "ParseReadList.h"
#include "ReadLocation.h"
#include "ReadPairing.h"
#include "TaskTimer.h"

#include "paths/CompletedInsert.h"
#include "paths/KmerBaseBroker.h"
#include "paths/KmerPathMuxSearcher.h"
#include "paths/MuxGraph.h"
#include "paths/MuxSearchPolicy.h"   // Where search policies come from
#include "paths/ReadFillDatabase.h"
#include "paths/SuperBaseVector.h"
#include "paths/Unipath.h"
#include "paths/WalkPlot.h"
#include "paths/HyperKmerPath.h"

#include "paths/simulation/SimTrueSequenceBroker.h"

#include "random/Random.h"

#include <set>
#include <algorithm>
#include <functional>


void ChangeReadsMSP( KmerPathMuxSearcher& searcher, 
                     MuxSearchPolicy*& pMSP_theseReadsOnly, 
                     const vec<int>& theseReads, 
                     const ReadFillDatabase& theReadFillDB );

void ChangeOrientedReadsMSP( KmerPathMuxSearcher& searcher, 
                             MuxSearchPolicy*& pMSP_theseOrientedReadsOnly, 
                             const vec<int>& theseReadsFw, const vec<int>& theseReadsRc, 
                             const ReadFillDatabase& theReadFillDB );

void SetupLocalizationSim( KmerPathMuxSearcher& searcher, 
                           vec<int>& inserts_to_walk, 
                           MuxSearchPolicy*& pMSP_theseReadsOnly,
                           SimTrueSequenceBroker& trueSeq, 
                           const int region_size, 
                           const int num_inserts_per_region,
                           const vec<read_pairing>& pairs,
                           const vec<int>& read_to_pair_map,
                           const int max_sep,
                           const ReadFillDatabase& theReadFillDB );

int main( int argc, char *argv[] )
{
  RunTime( );

  BeginCommandArguments;
    CommandArgument_String(PRE);
    CommandArgument_String(DATA);
    CommandArgument_String(RUN);
    CommandArgument_String_OrDefault(PREFIX, "round2");
    CommandArgument_String_OrDefault(PATHSHQ, "pathshq"); // for the piler
    CommandArgument_UnsignedInt_OrDefault(K, 48);

    CommandArgument_UnsignedInt_OrDefault(verbosity, 0);
    // print_closures prints nothing if verbosity=0.
    CommandArgument_Bool_OrDefault(print_closures, (verbosity > 1));
    CommandArgument_Bool_OrDefault(print_closure_sizes, False);

    // Don't do anything (calculate closures, print, etc) for dirty inserts
    CommandArgument_Bool_OrDefault(abandon_dirty_inserts, False);

    CommandArgument_String_OrDefault(THESE_READS_ONLY, "");
    CommandArgument_String_OrDefault(THESE_READS_ONLY_FW, "");
    CommandArgument_String_OrDefault(THESE_READS_ONLY_RC, "");

    // we find insert walks of length mean +- SD_MULT st.dev.
    CommandArgument_Double_OrDefault(SD_MULT, 3);

    CommandArgument_UnsignedInt_OrDefault(MIN_MATCH, 1);

    // Use these options to walk a random bunch of inserts:
    CommandArgument_UnsignedInt_OrDefault(start_at, 0);
    CommandArgument_UnsignedInt_OrDefault(num_to_try, 1);
    CommandArgument_Bool_OrDefault(choose_tries_randomly, False);
    CommandArgument_Bool_OrDefault(seed_random, False);
    CommandArgument_UnsignedInt_OrDefault(seed_random_with, 0);
    CommandArgument_UnsignedInt_OrDefault(max_sep, 6000);
    CommandArgument_Bool_OrDefault(flip_pairs, False); 

    // Give up if the search is taking too long.
    // A search limit of zero means unlimited.
    // As of this writing, we explore about 10 million search nodes
    // per minute, so let's rescale things to let the user enter
    // a number that's approximately seconds.
    CommandArgument_Double_OrDefault( search_limit, 0 );
    longlong search_limitx = int( floor( search_limit * 1000000.0 ) );

    // Give up if the search answer gets too large.
    // The search will abort if the number of nodes in the
    // answer subgraph grows larger than answer_size_limitx.
    // This only gives indirect information about the number of
    // closures; useful to keep a search from running forever.
    CommandArgument_Double_OrDefault( answer_size_limit, 0 );
    longlong answer_size_limitx = int( floor( answer_size_limit * 1000.0 ) );

    // These are IntSet arguments (q.v. ParseSet.h).  If a read in
    // from_read is in a pair, we attempt to walk from the specified
    // read to its partner (regardless of the order of the reads in
    // the read_pairing object).  Similarly, if a read in to_read is
    // in a pair, we attempt to walk from the specified read's partner
    // to the specified read.
    CommandArgument_String_OrDefault(from_read, "");
    CommandArgument_String_OrDefault(to_read, "");

    // Allow specification of pairs (from_read, SD_MULT), so that SD_MULT can be
    // be varied from insert to insert.  If specified, from_read_SD_MULT should be
    // of the form @filename, where filename is a list of pairs, one per line.  This
    // option overrides both from_read and SD_MULT.
    CommandArgument_String_OrDefault(from_read_SD_MULT, "");

    CommandArgument_String_OrDefault(SaveCompletedInsertsTo, "");
    CommandArgument_Bool_OrDefault(SaveCleanOnly, False);
    
    // Compute the locs on completed inserts?
    CommandArgument_Bool_OrDefault(ComputeLocs, True);

    // Plot the closures, if there are any
    CommandArgument_Bool_OrDefault(PlotClosures, False);
    // Plot the closures, if there is more than one
    CommandArgument_Bool_OrDefault(PlotMultiClosures, False);
  bool plot_something = PlotClosures || PlotMultiClosures;

    CommandArgument_Bool_OrDefault(PrintEachPath, False);

    CommandArgument_Bool_OrDefault(EstimateClosures, False);
    // If verbosity is low, this may calculate closures and then
    // throw the info away entirely...
    CommandArgument_Bool_OrDefault(CalculateClosures, False);
    CommandArgument_Bool_OrDefault(show_only_true_closure_count, False);

    // If this number of pseudo-closures exceeded, don't calculate closures.
    // No restriction unless specified.
    CommandArgument_UnsignedInt_OrDefault(MAX_PSEUDO, 0);

    // If calculating closures, abort when you find this many.
    // As usual, 0 means no limit.
    CommandArgument_UnsignedInt_OrDefault(MAX_CLOSURES, 0);
    

  bool calculate_closures = 
    print_closures || print_closure_sizes
    || plot_something
    || (!SaveCompletedInsertsTo.empty())
    || CalculateClosures;
  
    // To show histogram of number of insert closures found:
    CommandArgument_Bool_OrDefault(summary_table, False );

    if ( show_only_true_closure_count ) ForceAssert(CalculateClosures);

    // Use the "paired pairs" criterion for walking.
    // This only makes sense with a special type of data:
    // (1) very high depth of coverage, and
    // (2) insert sizes are so tightly clustered that 
    //     reads substantially overlapping on one end of 
    //     an insert force an overlap on the other end too
    // At the moment, such data is all simulated.
    CommandArgument_Bool_OrDefault(paired_pairs, False );

    // For testing:
    CommandArgument_UnsignedInt_OrDefault(trivial_policies, 0);

    // If SIM=True, declare that data is simulated, and thus that the file
    // run_dir/reads.ref.locs exists, providing the true locations of reads on
    // the genome.
    CommandArgument_Bool_OrDefault(SIM, False);

    // If test_false=True and SIM=True, test to see if only false closures were
    // found.  The only reason to turn this off is if the truth data has a 
    // different kmer-numbering.
    CommandArgument_Bool_OrDefault(test_false, True);

    // If USE_ONLY_TRUE_READS=True, turn on SIM, and when walking an insert, use 
    // only those reads which in fact lie fully within the insert.  
    CommandArgument_Bool_OrDefault(USE_ONLY_TRUE_READS, False);

    // If LOCALIZATION_SIM is non-empty, turn on SIM and simulate localization
    // as follows: select N regions of size S at random from the genome, and
    // select n inserts at random from each region (subject to max_sep), and
    // attempt to close them using only the reads from that region.  This
    // arguments should have the form "N,S,s".
    CommandArgument_String_OrDefault(LOCALIZATION_SIM, "");

    // If decompose_truth=True, take true insert (for simulated data),
    // decompose it it into unipaths, then describe truth in terms of them.
    // If decompose_truth_brief=True, do the same, except don't actually
    // print KmerPaths.
    CommandArgument_Bool_OrDefault(decompose_truth, False);
    CommandArgument_Bool_OrDefault(decompose_truth_brief, False);

    // If given a KmerPath, will tell you why it's not a closure.
    // To pass on the command line, enclose the path in single-quotes
    CommandArgument_String_OrDefault(FateOfClosure, "");

    // Save a DOT-language representation of the result MuxWalkGraph
    // in this file.  A # in the filename will be replaced with the read id.
    CommandArgument_String_Abbr_OrDefault(DOT, MWG_DOT, "");
    // Save a DOT-language representation of the HyperKmerPath version
    // of the result in this file.  Replaces # as above.
    CommandArgument_String_Abbr_OrDefault(HyperKmerPathDOT, HKP_DOT, "");
    CommandArgument_Int_OrDefault(HKP_min_edges, 2);

    // Search policies that use the OffsetTracker.
    // These can only be used if MakeUnipathedReads has been run.
    CommandArgument_Bool_OrDefault(ABORT_UNREACHABLE, False); 
    // ABORT_UNREACHABLE should affect speed, not answer

  EndCommandArguments;

  if (USE_ONLY_TRUE_READS) SIM = True;
  if (!LOCALIZATION_SIM.empty()) { SIM = True; }
  if ( decompose_truth || decompose_truth_brief ) ForceAssert(SIM);

  // Parse from_read_SD_MULT.

  map<int,double> to_SD_MULT;
  vec<int> special_from_read;
  if ( from_read_SD_MULT != "" )
  {    ForceAssert( from_read_SD_MULT.Contains( "@", 0 ) );
       fast_ifstream in( from_read_SD_MULT.After( "@" ) );
       String line;
       while(1)
       {    getline( in, line );
            if ( in.fail( ) ) break;
            istrstream iline( line.c_str( ) );
            int id;
            double dev;
            iline >> id >> dev;
            special_from_read.push_back(id);
            to_SD_MULT[id] = dev;    }
       UniqueSort(special_from_read);    }

  // seed the random number generator
  if ( seed_random || seed_random_with != 0 ) {
    int seedling = ( seed_random_with != 0 ? seed_random_with : time(0) );
    cout << "Seeding random number generator with " << seedling << "." << endl;
    srandomx( seedling );
  }

  // Set up data
  String dataDir = PRE + "/" + DATA;
  String runDir = PRE + "/" + DATA + "/" + RUN;

  //--------- Things for the KmerPathMuxSearcher:
  //
  //

  // The MuxGraph:
  String muxgraph_file = 
    runDir + "/reads." + PREFIX + "_muxgraph.k" + ToString(K);
  MuxGraph theMuxGraph;
  if( ! theMuxGraph.FilesExist( muxgraph_file ) ) {
    cout << "ERROR: the list of minimal extensions has not been computed."
	 << " Please run\n  FindMuxes DATA=" << DATA 
	 << " RUN=" << RUN << " PATHSHQ=" << PREFIX << endl;
    exit(1);
  }
  theMuxGraph.Read( muxgraph_file );

  // The Read Fill Database:
  String readfillDB_file =
    runDir + "/reads." + PREFIX + "_fillrecords.k" + ToString(K);
  ReadFillDatabase theReadFillDB( readfillDB_file );

  // The list of subsumed reads:
  String subsumption_file = 
    runDir + "/reads." + PREFIX + "_sublist.k" + ToString(K);
  SubsumptionList theSubList;
  if( ! IsRegularFile(subsumption_file) ) {
    cout << "ERROR: the list of read subsumptions has not been computed."
	 << " Please run\n  FindSubsumptions DATA=" << DATA 
	 << " RUN=" << RUN << " PATHSHQ=" << PREFIX 
	 << "\nThe missing file is " << subsumption_file << endl;
    exit(1);
  }
  theSubList.Read( subsumption_file );

  // Get original read lengths (in bases, not kmers!).
  // If super-reads have been created, we need to know their lengths too,
  // so load reads.PREFIX_lengths preferentially, if it exists.
  vec<int> readLengths;
  String lengths_file = runDir + "/reads." + PREFIX + "_lengths.k" + ToString(K);
  if( IsRegularFile( lengths_file ) ) {
    BREADX2( lengths_file, readLengths );
    if( verbosity > 3 )
      cout << "Read lengths from " << lengths_file << endl;
  }
  else {
    BREADX2( runDir + "/reads.lengths", readLengths );
  }

  // If the KmerPathMuxSearcher is working with any policies,
  // it needs to know the read lengths -- in kmers.  Since
  // this is easay to compute, let's just do it regardless.
  vec<int> readLengthsInKmers( readLengths.size() );
  transform( readLengths.begin(), readLengths.end(),
	     readLengthsInKmers.begin(),
	     bind2nd(minus<int>(), K-1) );
  //
  //
  //--------- That's all the KmerPathMuxSearcher needs.



  // If data is simulated, a SimTrueSequenceBroker knows the truth.
  // Set one up (for USE_ONLY_TRUE_READS, decompose_truth, identifying truth)
  SimTrueSequenceBroker trueSeq;
  KmerBaseBroker* kbb = 0;
  if ( SIM )
  {    trueSeq.LoadData( dataDir, runDir, K, readLengths );
       if ( trueSeq.BasevectorMode( ) ) kbb = new KmerBaseBroker( runDir, K );    }

  vec<int> reads_to_use_opener_dir, reads_to_use_closer_dir;
  KmerPath truth;
  basevector truthb;
  MuxSearchPolicy* temp_policy = 0;



  // Get read pairs:
  vec<read_pairing> pairs;
  ReadPairsFile( runDir + "/reads.pairto", pairs );

  bool special_request 
       = ! ( from_read.empty() && to_read.empty() && from_read_SD_MULT == "" );
  if ( special_request )
  {
    cout << "Special request: ignoring "
         << "choose_tries_randomly, start_at, flip_pairs, and num_to_try." 
	 << endl;

    choose_tries_randomly=False;
    start_at=0;
    flip_pairs=False;
    
    vec<int> readIds;

    if ( from_read_SD_MULT == "" ) ParseIntSet( from_read, readIds );
    else readIds = special_from_read;

    vec<read_pairing> pairsToKeep;
    for ( unsigned int pairIdx = 0; pairIdx < pairs.size(); ++pairIdx )
    {
      // If the from read is id1, copy the pair.
      if ( binary_search( readIds.begin(), readIds.end(), pairs[pairIdx].id1 ) )
        pairsToKeep.push_back( pairs[pairIdx] );
      // If the from read is id2, copy the pair but swap the ids in
      // the pair so the from read is id1.
      if ( binary_search( readIds.begin(), readIds.end(), pairs[pairIdx].id2 ) )
      {
        pairsToKeep.push_back( pairs[pairIdx] );
        swap( pairsToKeep.back().id1, pairsToKeep.back().id2 );
      }
    }

    readIds.clear();
    ParseIntSet( to_read, readIds );

    for ( unsigned int pairIdx = 0; pairIdx < pairs.size(); ++pairIdx )
    {
      // If the to read is id1, copy the pair but swap the ids in
      // the pair so the to read is id2.
      if ( binary_search( readIds.begin(), readIds.end(), pairs[pairIdx].id1 ) )
      {
        pairsToKeep.push_back( pairs[pairIdx] );
        swap( pairsToKeep.back().id1, pairsToKeep.back().id2 );
      }
      // If the to read is id2, copy the pair.
      if ( binary_search( readIds.begin(), readIds.end(), pairs[pairIdx].id2 ) )
        pairsToKeep.push_back( pairs[pairIdx] );
    }

    pairs.swap( pairsToKeep );
    num_to_try = pairs.size();
  } // End of special-request mucking around with read pairing information
    

  

  KmerPathMuxSearcher searcher( &theMuxGraph, &theReadFillDB, 
				&theSubList, &readLengthsInKmers,
				verbosity );
  searcher.SetSearchLimit( search_limitx );  // already rescaled
  searcher.SetAnswerSizeLimit( answer_size_limitx );  // already rescaled
  searcher.SetMinPerfectMatch( MIN_MATCH );


  // for paired_pairs or calculating paths for closures, we need more data:
  vecKmerPath pathsFw, pathsRc;

  if( paired_pairs || calculate_closures || PrintEachPath
      || !FateOfClosure.empty() || ! HyperKmerPathDOT.empty() ) {
    pathsFw.ReadAll( runDir + "/reads." + PREFIX + ".k" + ToString(K) );
    pathsRc.ReadAll( runDir + "/reads." + PREFIX + "_rc.k" + ToString(K) );
  }

  phandler pairsHandler( pathsFw.size() );   // size 0, if it won't be used.


  //////////////////////////////////////////////
  //
  //  Creation of requested MuxSearchPolicies:
  //

  if( PrintEachPath )
    searcher.AddPolicy( new MSP_PrintEachPath( &pathsFw, &pathsRc ) );

  if( !FateOfClosure.empty() )
    searcher.AddPolicy( new MSP_FateOfClosure( FateOfClosure,
					       &pathsFw, &pathsRc,
					       &theMuxGraph ) );

  if( paired_pairs ) {
    // Hand a PairedPairs mux search policy to the KmerPathMuxSearcher
    pairsHandler.LoadFromFile( runDir + "/reads.pairto" );
    searcher.AddPolicy( new MSP_PairedPairs( &pathsFw, &pathsRc, 
					     &pairsHandler, verbosity ) );
  }

  // For testing:
  while( trivial_policies-- ) {
    searcher.AddPolicy( new MSP_Trivial() );
  }

  // Use only a subset of the reads:
  MuxSearchPolicy* pMSP_theseReadsOnly = 0;

  if( ! THESE_READS_ONLY.empty() ) {
    vec<int> theseReads;
    ParseReadList( THESE_READS_ONLY, runDir, theseReads );
    ChangeReadsMSP( searcher, pMSP_theseReadsOnly, 
                    theseReads, theReadFillDB );
  }

  MuxSearchPolicy* pMSP_theseOrientedReadsOnly = 0;

  if( THESE_READS_ONLY_FW.nonempty() || THESE_READS_ONLY_RC.nonempty() ) {
    vec<int> theseReadsFw, theseReadsRc;
    ParseReadList( THESE_READS_ONLY_FW, runDir, theseReadsFw );
    ParseReadList( THESE_READS_ONLY_RC, runDir, theseReadsRc );
    ChangeOrientedReadsMSP( searcher, pMSP_theseOrientedReadsOnly,
                            theseReadsFw, theseReadsRc, theReadFillDB );
  }
  
  OffsetTracker offsetTracker;
  if( ABORT_UNREACHABLE ) {
    offsetTracker.Read( runDir + "/reads." + PREFIX + "_offsets.k" + ToString(K) );
    searcher.AddPolicy( new MSP_ReachCloserOrAbort( &offsetTracker ) );
  }


  // For SaveCompletedInsertsTo, we need the original reads.
  // Hmm, we might well already have them.  Let's not load twice!
  bool reads_are_pathshq = (PREFIX == PATHSHQ);
  vecKmerPath unfilled_pathsFw, unfilled_pathsRc;
  vec<tagged_rpint> unfilled_rawPathsDB;
  if( ! SaveCompletedInsertsTo.empty() ) {
    if( ! reads_are_pathshq ) {
      unfilled_pathsFw.ReadAll( runDir + "/reads." + PATHSHQ + ".k" + ToString(K) );
      unfilled_pathsRc.ReadAll( runDir + "/reads." + PATHSHQ + "_rc.k" + ToString(K) );
    }
    BREADX2( runDir + "/reads." + PATHSHQ + "db.k" + ToString(K), 
	     unfilled_rawPathsDB );
  }
  ReadsOnPathPiler piler( reads_are_pathshq ? pathsFw : unfilled_pathsFw, 
			  reads_are_pathshq ? pathsRc : unfilled_pathsRc, 
			  unfilled_rawPathsDB );
  CompletedInsertWriter* p_ciWriter = 0;
  if ( ! SaveCompletedInsertsTo.empty() )
    p_ciWriter = new CompletedInsertWriter( SaveCompletedInsertsTo, piler );
  

  // Set up for plotting closures.  This constructs a plotter which
  // creates its own KmerBaseBroker.  If we add anything else that uses
  // a KBB, we should create the KBB here and pass it in, to avoid loading
  // data twice.  But this is easiest for now.
  WalkPlotter* p_walkPlotter = 0;
  if( plot_something )
    p_walkPlotter = new WalkPlotter( runDir, K );


  int num_local_regions = 0;
  int region_size = 0;
  int num_inserts_per_region = 0;
  vec<int> inserts_to_walk;
  vec<int> read_to_pair_map;

  if (! LOCALIZATION_SIM.empty()) {
    vec<String> tokens;
    Tokenize( LOCALIZATION_SIM, tokens, "," );
    if ( tokens.size() != 3 ||
         ! tokens[0].IsInt() ||
         ! tokens[1].IsInt() ||
         ! tokens[2].IsInt() ) {
      cout << "LOCALIZATION_SIM should have the form \"<int>,<int>,<int>\"." << endl;
    }
    num_local_regions = tokens[0].Int();
    region_size = tokens[1].Int();
    num_to_try = num_inserts_per_region = tokens[2].Int();
    inserts_to_walk.resize( num_inserts_per_region );

    read_to_pair_map.resize( readLengths.size(), -1 );
    for ( unsigned int p = 0; p < pairs.size(); ++p ) {
      read_to_pair_map[ pairs[p].id1 ] = p;
      read_to_pair_map[ pairs[p].id2 ] = p;
    }

    SetupLocalizationSim( searcher, inserts_to_walk, pMSP_theseReadsOnly, 
                          trueSeq, region_size, num_inserts_per_region, 
                          pairs, read_to_pair_map, max_sep, theReadFillDB );
  }

  // Try to walk through random short inserts.

  int numPathsTried = 0;

  int numCleanNotClosed = 0;
  int numCleanUniquelyClosed = 0;
  int numCleanMultiplyClosed = 0;
  int numDirtyNotClosed = 0;
  int numDirtyClosed = 0;

  vec<int> closures_found( 17, 0 ), closures_found_after_explosion( 17, 0 );

  set<int> pathsTried;

  for ( int pi = start_at; pi < (int) pairs.size(); ++pi )
  {
    int p = pi;

    if ( ! LOCALIZATION_SIM.empty() ) {
      if ( numPathsTried == num_inserts_per_region ) {
        if ( --num_local_regions == 0 )
          break;
        numPathsTried = 0;
        SetupLocalizationSim( searcher, inserts_to_walk, pMSP_theseReadsOnly, 
                              trueSeq, region_size, num_inserts_per_region, pairs,
                              read_to_pair_map, max_sep, theReadFillDB );
      }
      p = inserts_to_walk[numPathsTried];
      flip_pairs = randomx( ) % 2;
    }

    else if (choose_tries_randomly)
    {
      do 
      {
        p = start_at + randomx( ) % ( pairs.isize( ) - start_at );
      }
      while ( pathsTried.count( p ) != 0 );
      
      pathsTried.insert( p );
    }

    int id1 = pairs[p].id1, id2 = pairs[p].id2;
    int sep = pairs[p].sep, dev = pairs[p].sd;

    if( sep > (int)max_sep ) {
	continue;
    }

    if ( flip_pairs )
      swap( id1, id2 );

    if ( numPathsTried >= (int) num_to_try )
      break;

    ++numPathsTried;

    // Set up output.  Please do not add any per-insert printing,
    // especially error messages or debugging info, before here.
    if ( verbosity > 0 )
    {
      cout << "\n------------------------------------------------------------"
	   << "--------------------\n" << endl;
      cout << Date() << ": Insert #" << numPathsTried << endl;
      if ( special_request )
        cout << "Walking from read " << id1 << " to read " << id2 
             << " (" << sep << " ~ " << dev << ")"
             << "." << endl;
      else
        cout << "Walking across pair " << p 
               << " (from read " << id1 << " to read " << id2 << ")." << endl;
    }

    // If simulated data, determine the reads which actually belong on the insert, 
    // and determine the true path.
    if (SIM) {
      trueSeq.SetInsert( id1, id2 );
      if ( trueSeq.KmerPathMode( ) ) trueSeq.GetTruth( truth );
      else trueSeq.GetTruth( truthb );

      cout << "True location: " << trueSeq.Contig( ) << "."
           << trueSeq.Start( ) << "-" << trueSeq.Stop( ) << "\n";

      if (USE_ONLY_TRUE_READS) {
	trueSeq.GetFullyContainedReads( reads_to_use_opener_dir,
					reads_to_use_closer_dir );
	temp_policy = new MSP_TheseOrientedReadsOnly(reads_to_use_opener_dir,
						     reads_to_use_closer_dir);
	searcher.AddPolicy( temp_policy );    

	if ( verbosity > 7 ) {
	  cout << "TRUE READS:\nopener dir: ";
	  copy( reads_to_use_opener_dir.begin(), reads_to_use_opener_dir.end(),
		ostream_iterator<int>(cout, " ") );
	  cout << "\ncloser dir: ";
	  copy( reads_to_use_closer_dir.begin(), reads_to_use_closer_dir.end(),
		ostream_iterator<int>(cout, " ") );
	  cout << endl;
	}
      }
    }


    // Calculate the permissible lengths for the insert.
    //
    // In a mux-based search, the useful notion of "position" for a
    // read is the position of its leftmost point.  (This is the point
    // where the read "ends", since mux searching runs right-to-left.)
    // So we need to tell the searcher the distance between the left
    // endpoints of the opening (right) and closing (left) reads,
    // i.e. the insert separation plus the length of the left read.
    //
    // Note that this calculation does not involve K.  Really!
    // The distance between two kmers (in kmers) is the same as
    // the distance between their two leftmost bases (in bases).
    //
    // The trimming of reads to get kmer paths is handled separately,
    // within the KmerPathMuxSearcher itself.

    double sd_mult = SD_MULT;
    if ( from_read_SD_MULT != "" )
          sd_mult = Max( to_SD_MULT[id1], to_SD_MULT[id2] );
    const int range = int( round( sd_mult * double(dev) ) );

    const int minAcceptableExtLength = sep + readLengths[id2] - range;
    const int maxAcceptableExtLength = sep + readLengths[id2] + range;


    if ( verbosity > 1 )
      PRINT2( minAcceptableExtLength, maxAcceptableExtLength );

    TaskTimer searchTimer, closureTimer;
    searchTimer.Start();

    MuxSearchResult result;
    // Note that mux searching walks right-to-left, so to walk from id1
    // to id2, put id1 on the right:
    searcher.FindClosures( id2, id1, 
                           minAcceptableExtLength, maxAcceptableExtLength,
                           result );

    searchTimer.Stop();
    if ( verbosity > 0 ) {
      cout << "searchTimer: " << searchTimer << endl;
      cout << "Number of states explored: " 
	   << result.num_states_explored << endl;
      cout << "Number of nodes in answer graph: " 
	   << result.WalkGraph().size() << endl;
      if ( verbosity > 2 )
	result.WalkGraph().Summary(cout);
    }

    // If USE_ONLY_TRUE_READS, clean up.
    if (USE_ONLY_TRUE_READS)
    {    searcher.RemovePolicy(temp_policy);
         delete temp_policy;    }

    // ---------
    // POST-PROCESSING: What we do after the walking is over
    // ---------

    if( ! DOT.empty() ) {
      String dotfile = DOT;
      if( dotfile.Contains("#") ) dotfile.ReplaceBy("#", ToString(id1) );

      Ofstream( dotstream, dotfile );
      result.WalkGraph().Dot(dotstream);
      cout << "Dot file saved in " << dotfile << endl;
    }


    HyperKmerPath hkp(K, vec<KmerPath>() ); // empty hkp with K set
    if( ! HyperKmerPathDOT.empty() ) {
      TaskTimer HyperKmerPath_construction;
      HyperKmerPath_construction.Start();

      result.WalkGraph().MakeHyperKmerPath( &pathsFw, &pathsRc, 
					    &theSubList, hkp );

      HyperKmerPath_construction.Stop();
      if( verbosity > 0 )
	PRINT( HyperKmerPath_construction );

      if( hkp.EdgeObjectCount() >= HKP_min_edges ) {

	String dotfile = HyperKmerPathDOT;
	if( dotfile.Contains("#") ) dotfile.ReplaceBy("#", ToString(id1) );
      
	Ofstream( dotstream, dotfile );
	hkp.PrintSummaryDOT0w( dotstream, 
			       false, /* no contig labels */
			       false, /* no vertex labels */
			       false  /* no edge labels   */ );
      }
    }


    unsigned int nclosures = result.num_closures_found;           // initial count
    Bool pseudo_limit_exceeded = ( MAX_PSEUDO > 0 && nclosures > MAX_PSEUDO );
    Bool dirty = result.hit_search_limit || pseudo_limit_exceeded;// initial value
    Bool giving_up = abandon_dirty_inserts && dirty;              // initial value

    if ( pseudo_limit_exceeded && verbosity > 0 )
    {    cout << "Number of pseudo-closures ";
         if (show_only_true_closure_count)
              cout << "(" << result.num_closures_found << ") ";
         cout << "exceeds MAX_PSEUDO." << endl;    }

    unsigned int all_closures_size = 0;
    if ( calculate_closures && !giving_up ) {
      closureTimer.Start();
      result.CalculateAllClosures( &pathsFw, &pathsRc, MAX_CLOSURES );
      nclosures = all_closures_size = result.all_closures.size();
      if ( result.hit_closure_limit )
      {
        if ( verbosity > 0 ) {
	     cout << "Hit MAX_CLOSURES=" << MAX_CLOSURES << " limit" << endl;
	     if( nclosures < MAX_CLOSURES )
	       cout << "Actually found only " << nclosures
		    << " distinct closures, but found them with multiplicity"
		    << endl;
	}
        dirty = True;
        if (abandon_dirty_inserts) giving_up = True;
      }
      closureTimer.Stop();
      if ( verbosity > 0 )
	cout << "build paths: " << closureTimer << endl;
    }

    if (dirty) {
      if ( nclosures < 10 ) ++closures_found_after_explosion[nclosures];
      else if ( nclosures < 100 ) ++closures_found_after_explosion[10];
      else if ( nclosures < 1000 ) ++closures_found_after_explosion[11];
      else if ( nclosures < 10000 ) ++closures_found_after_explosion[12];
      else if ( nclosures < 100000 ) ++closures_found_after_explosion[13];
      else if ( nclosures < 1000000 ) ++closures_found_after_explosion[14];
      else if ( nclosures < 10000000 ) ++closures_found_after_explosion[15];
      else ++closures_found_after_explosion[16];
      if( nclosures )
	numDirtyClosed++;
      else
	numDirtyNotClosed++;
    }
    else {
      if ( nclosures < 10 ) ++closures_found[nclosures];
      else if ( nclosures < 100 ) ++closures_found[10];
      else if ( nclosures < 1000 ) ++closures_found[11];
      else if ( nclosures < 10000 ) ++closures_found[12];
      else if ( nclosures < 100000 ) ++closures_found[13];
      else if ( nclosures < 1000000 ) ++closures_found[14];
      else if ( nclosures < 10000000 ) ++closures_found[15];
      else ++closures_found[16];
      switch ( nclosures ) {
      case 0:
	++numCleanNotClosed;
	break;
      case 1:
	++numCleanUniquelyClosed;
	break;
      default:
	++numCleanMultiplyClosed;
	break;
      }
    }

    // We need a way to not waste time on things that exploded.
    if ( giving_up ) {
      if ( verbosity > 0 )
	cout << "Found " << nclosures
	     << " closure events; DIRTY -- abandoning insert" << endl;
      continue;
    }

    if ( dirty && SaveCleanOnly ) giving_up = True;
 
    if ( p_ciWriter && !giving_up
	 && calculate_closures // should always be true, if there's a p_ciWriter
              )
      p_ciWriter->Write( id1, id2, 
                         result.all_closures,
			 // path length, not mux-style extension length!
                         minAcceptableExtLength + readLengths[id1], 
			 maxAcceptableExtLength + readLengths[id1], 
			 MIN_MATCH,
                         ComputeLocs );

    if ( verbosity > 0 ) {
      cout << "Found ";
      if ( show_only_true_closure_count && !pseudo_limit_exceeded )
	cout << all_closures_size;
      else
      {
         cout << result.num_closures_found << " pseudo";
         if ( EstimateClosures )
	   cout << ", approx " << result.EstimateNumClosures();
         if ( calculate_closures && !pseudo_limit_exceeded )
	   cout << ", actually " << all_closures_size;
      }
      cout << " closures";
      if( result.hit_search_limit || pseudo_limit_exceeded 
	  || result.hit_closure_limit ) 
	cout << " DIRTY";
      cout << endl;
     

      if ( print_closures && !pseudo_limit_exceeded ) {
	for( vecKmerPath::const_iterator closureIter = result.all_closures.begin();
	     closureIter != result.all_closures.end(); closureIter++ )
        {
          if ( SIM && trueSeq.KmerPathMode( ) && *closureIter == truth ) 
               cout << "Kmers (TRUTH): ";
          else cout << "Kmers: ";
	  cout << closureIter->KmerCount() << " " << *closureIter << endl;
        }
        if ( SIM && trueSeq.KmerPathMode( ) )
          cout << "Kmers in truth: " << truth.KmerCount( ) << " " << truth << endl;
      }

      if ( SIM && test_false && !pseudo_limit_exceeded )
      {
      Bool found_truth = False;
      for( vecKmerPath::const_iterator closureIter = result.all_closures.begin();
          closureIter != result.all_closures.end(); closureIter++ )
      {    if ( trueSeq.KmerPathMode( ) )
           {    if ( *closureIter == truth ) found_truth = True;    }
           else
           {    SuperBaseVector s = kbb->ToSequence(*closureIter);
                ForceAssertEq( s.size( ), 1 );
                if ( s.Seq(0) == truthb ) found_truth = True;    }    }
      if ( !found_truth && test_false && !result.all_closures.empty() )
           cout << "Warning: only false closures found!\n";
      }

      if ( print_closure_sizes && !pseudo_limit_exceeded ) {
        static vec<int> closure_sizes;
        closure_sizes.clear( );
	for( vecKmerPath::const_iterator closureIter = result.all_closures.begin();
	     closureIter != result.all_closures.end(); closureIter++ )
          closure_sizes.push_back( closureIter->KmerCount() );
        Sort(closure_sizes);
        for ( int i = 0; i < closure_sizes.isize( ); i++ )
        {    int j;
             for ( j = i + 1; j < closure_sizes.isize( ); j++ )
                  if ( closure_sizes[j] != closure_sizes[i] ) break;
	     cout << "Kmers: " << closure_sizes[i] << " [" << j-i << "]" << endl;
             i = j - 1;    }
        if ( SIM && trueSeq.KmerPathMode( ) ) 
             cout << "Kmers in truth: " << truth.KmerCount( ) << endl;
      }

      if ( decompose_truth && trueSeq.KmerPathMode( ) )
      {    cout << "Truth decomposition = ";
           DecomposePath(truth);    }
      if ( decompose_truth_brief && trueSeq.KmerPathMode( ) )
      {    cout << "Truth decomposition = ";
           DecomposePath(truth, True);    }

    } // end of verbosity>0


    if ( plot_something &&  // but are we supposed to plot this?
	 ( (PlotClosures && all_closures_size > 0) ||
	   (PlotMultiClosures && all_closures_size > 1) ) ) {
      ForceAssert( p_walkPlotter );
      ForceAssert( calculate_closures );
// Aargh, why doesn't this work:
//       for_each( result.all_closures.begin(), result.all_closures.end(),
// 		bind1st( mem_fun(WalkPlotter::AddClosure), p_walkPlotter ) );
      for( vecKmerPath::const_iterator cl = result.all_closures.begin();
	   cl != result.all_closures.end(); cl++ )
	p_walkPlotter->AddClosure( *cl );

      if ( SIM && trueSeq.KmerPathMode( ) ) p_walkPlotter->SetTruth(truth);

      p_walkPlotter->Plot();
      p_walkPlotter->Clear();
    }
  }  // End of main loop over inserts

  // Print summary statistics.

  if ( !summary_table )
  {
       cout << "\nNumber of inserts closed 0/1/2+ times: "
            << numCleanNotClosed << " / "
            << numCleanUniquelyClosed << " / "
            << numCleanMultiplyClosed;
       if( numDirtyClosed || numDirtyNotClosed )
         cout << "\nNumber which hit the search limit without/with closing: " 
	      << numDirtyNotClosed << " / "
	      << numDirtyClosed;
       cout << endl;
  }
  if (summary_table)
  {    vec< vec<String> > rows;
       vec<String> row1, row2, rowl, rowm;
       int total1 = 0, total2 = 0;
       row1.push_back( "closures", "#inserts",    "#inserts" );
       row2.push_back( "found",    "attempted",   "exploded" );
       rows.push_back( row1, row2 );
       for ( int i = 0; i <= 16; i++ )
       {    vec<String> row;
            if ( i < 10 ) row.push_back( ToString(i) );
            else if ( i == 10 ) row.push_back( "10 - 99" );
            else if ( i == 11 ) row.push_back( "100 - 999" );
            else if ( i == 12 ) row.push_back( "1000 - 9999" );
            else if ( i == 13 ) row.push_back( "10000 - 99999" );
            else if ( i == 14 ) row.push_back( "100000 - 999999" );
            else if ( i == 15 ) row.push_back( "1000000 - 9999999" );
            else row.push_back( ">= 10000000" );
            row.push_back( ToString( closures_found[i] ) );
            row.push_back( ToString( closures_found_after_explosion[i] ) );
            total1 += closures_found[i];
            total2 += closures_found_after_explosion[i];
            rows.push_back(row);    }
       rowl.push_back( "", "------", "------" );
       rowm.push_back( "total", ToString(total1), ToString(total2) );
       rows.push_back( rowl, rowm );
       cout << "\nSummary - number of insert closures found\n";
       PrintTabular( cout, rows, 2, "rrr" );    }

  // Must delete this object, or the count of inserts won't get written.
  if ( p_ciWriter )
    delete p_ciWriter;

}


void ChangeReadsMSP( KmerPathMuxSearcher& searcher, 
                     MuxSearchPolicy*& pMSP_theseReadsOnly, 
                     const vec<int>& theseReads, 
                     const ReadFillDatabase& theReadFillDB )
{
  searcher.RemovePolicy( pMSP_theseReadsOnly );
  delete pMSP_theseReadsOnly;
  
  if( theReadFillDB.IsTrivialRFD() ) {
    // If the ReadFillDatabase is trivial, use only these reads:
    pMSP_theseReadsOnly = new MSP_TheseReadsOnly( theseReads );
  } 
  else {
    // otherwise, have to change read numbers to fill numbers:
    vec<int> theseFills;
    for( vec<int>::const_iterator r = theseReads.begin(); 
         r != theseReads.end(); r++ )
      for( int f = theReadFillDB.FirstFilling(*r);
           f <= theReadFillDB.LastFilling(*r); f++ )
        theseFills.push_back(f);
    pMSP_theseReadsOnly = new MSP_TheseReadsOnly( theseFills );
  }

  searcher.AddPolicy( pMSP_theseReadsOnly );
}


void ChangeOrientedReadsMSP( KmerPathMuxSearcher& searcher, 
                             MuxSearchPolicy*& pMSP_theseOrientedReadsOnly, 
                             const vec<int>& theseReadsFw, const vec<int>& theseReadsRc, 
                             const ReadFillDatabase& theReadFillDB )
{
  searcher.RemovePolicy( pMSP_theseOrientedReadsOnly );
  delete pMSP_theseOrientedReadsOnly;

  if( theReadFillDB.IsTrivialRFD() ) {
    // If the ReadFillDatabase is trivial, use only these reads:
    pMSP_theseOrientedReadsOnly = new MSP_TheseOrientedReadsOnly( theseReadsRc, theseReadsFw );
  } 
  else {
    // otherwise, have to change read numbers to fill numbers:
    vec<int> theseFillsFw, theseFillsRc;
    for( vec<int>::const_iterator r = theseReadsFw.begin(); 
         r != theseReadsFw.end(); r++ )
      for( int f = theReadFillDB.FirstFilling(*r);
           f <= theReadFillDB.LastFilling(*r); f++ )
        theseFillsFw.push_back(f);
    for( vec<int>::const_iterator r = theseReadsRc.begin(); 
         r != theseReadsRc.end(); r++ )
      for( int f = theReadFillDB.FirstFilling(*r);
           f <= theReadFillDB.LastFilling(*r); f++ )
        theseFillsRc.push_back(f);
    pMSP_theseOrientedReadsOnly = new MSP_TheseOrientedReadsOnly( theseFillsRc, theseFillsFw );
  }

  searcher.AddPolicy( pMSP_theseOrientedReadsOnly );
}


void SetupLocalizationSim( KmerPathMuxSearcher& searcher, 
                           vec<int>& inserts_to_walk, 
                           MuxSearchPolicy*& pMSP_theseReadsOnly,
                           SimTrueSequenceBroker& trueSeq, 
                           const int region_size, 
                           const int num_inserts_per_region,
                           const vec<read_pairing>& pairs,
                           const vec<int>& read_to_pair_map,
                           const int max_sep,
                           const ReadFillDatabase& theReadFillDB )
{
  vec<int> reference_sizes;
  trueSeq.GetReferenceSizes( reference_sizes );
  // Convert to number of possible region starts on each sequence.
  for ( unsigned int i = 0; i < reference_sizes.size(); ++i )
    reference_sizes[i] = max(0,reference_sizes[i]-region_size);
  longlong numPossibleStarts = 0;
  for ( unsigned int i = 0; i < reference_sizes.size(); ++i )
    numPossibleStarts += reference_sizes[i];
 pick_region:
  longlong region_start = big_random() % numPossibleStarts;
  int ref_idx = 0;
  for ( ; ref_idx < reference_sizes.isize(); ++ref_idx )
    if ( region_start > reference_sizes[ref_idx] )
      region_start -= reference_sizes[ref_idx];
    else
      break;
  trueSeq.SetRegion( ref_idx, region_start, region_start + region_size );
  vec<int> fwReads, rcReads;
  trueSeq.GetFullyContainedReads( fwReads, rcReads );
  PRINT2( fwReads.size(), rcReads.size() );
  Sort( rcReads );
  vec<int> possible_pairs;
  for ( unsigned int f = 0; f < fwReads.size(); ++f ) {
    int p = read_to_pair_map[fwReads[f]];
    if ( pairs[p].sep <= max_sep &&
         binary_search( rcReads.begin(), rcReads.end(), pairs[p].Partner(fwReads[f]) ) )
      possible_pairs.push_back(p);
  }
  if ( possible_pairs.size() < inserts_to_walk.size() ) {
    cout << "Too few inserts (" << possible_pairs.size() << ") in region " 
         << ref_idx << "." << region_start << "-" << region_start+region_size << "."
         << "  Picking a new region." << endl;
    goto pick_region;
  }
  cout << "Picked " << ref_idx << "." << region_start
       << "-" << region_start+region_size << "." << endl;

  vec<Bool> pair_is_picked( possible_pairs.size(), False );
  int num_pairs_picked = 0;
  while ( num_pairs_picked < inserts_to_walk.isize() ) {
    int pair_idx = randomx() % possible_pairs.size();
    if ( ! pair_is_picked[ pair_idx ] ) {
      pair_is_picked[ pair_idx ] = True;
      inserts_to_walk[num_pairs_picked++] = possible_pairs[pair_idx];
    }
  }
  vec<int> allReads;
  allReads.swap( fwReads );
  allReads.append( rcReads );
  ChangeReadsMSP( searcher, pMSP_theseReadsOnly, allReads, theReadFillDB );
}

