/////////////////////////////////////////////////////////////////////////////
//                   SOFTWARE COPYRIGHT NOTICE AGREEMENT                   //
//       This software and its documentation are copyright (2006) by the   //
//   Broad Institute/Massachusetts Institute of Technology.  All rights    //
//   are reserved.  This software is supplied without any warranty or      //
//   guaranteed support whatsoever. Neither the Broad Institute nor MIT    //
//   can be responsible for its use, misuse, or functionality.             //
/////////////////////////////////////////////////////////////////////////////

#ifndef FORCE_DEBUG
     #define NDEBUG
#endif

#include "Basevector.h"
#include "CoreTools.h"
#include "Equiv.h"
#include "Feudal.h"
#include "math/HoInterval.h"
#include "lookup/LookAlign.h"
#include "math/Functions.h"
#include "paths/AlignHyperKmerPath.h"
#include "paths/EvalUtils.h"
#include "paths/HyperKmerPath.h"
#include "paths/KmerBaseBroker.h"
#include "paths/ReadsToPathsCoreX.h"

/**
   Function: SummarizeReferenceCoverage

   Print out information reporting gaps in coverage (tiny gaps are ignored).
   
*/
void SummarizeReferenceCoverage( ostream& out,
     const vecbasevector& genome, vec<look_align>& aligns, const Bool diploid,
     const Bool brief = False )
{    vec< vec<ho_interval> > covered( genome.size( ) );
     for ( int i = 0; i < aligns.isize( ); i++ )
     {    const look_align& la = aligns[i];
          covered[la.target_id].push_back(
               ho_interval( la.a.pos2( ), la.a.Pos2( ) ) );    }
     longlong total_bases = 0, total_covered = 0;
     vec<int> gaps;
     for ( int i = 0; i < genome.size( ); i++ )
     {    vec< pair<ho_interval, int> > covered2;
          CondenseIntervals( genome[i].size( ), covered[i], covered2 );
          for ( int j = 0; j < covered2.isize( ); j++ )
          {    if ( covered2[j].second == 0 )
                    gaps.push_back( covered2[j].first.Length( ) );    }    }
     ReverseSort(gaps);
     int min_gap = 1, gaps_to_show = 25;
     if ( gaps.isize( ) > gaps_to_show ) min_gap = gaps[gaps_to_show-1];
     if ( !brief )  out << "\ngaps of size >= " << min_gap << ":\n";
     for ( int i = 0; i < genome.size( ); i++ )
     {    vec< pair<ho_interval, int> > covered2;
          CondenseIntervals( genome[i].size( ), covered[i], covered2 );
          for ( int j = 0; j < covered2.isize( ); j++ )
          {    total_bases += covered2[j].first.Length( );
               if ( covered2[j].second > 0 )
                    total_covered += covered2[j].first.Length( );
               else
               {    if ( !brief && covered2[j].first.Length( ) >= min_gap )
                    {    out << i << "." << covered2[j].first.Start( ) << "-" 
                              << covered2[j].first.Stop( ) << " (" 
                              << covered2[j].first.Length( )
                              << " bases)\n";    }    }    }    }
     if ( !brief || !diploid )
     {    out << "\nSUMMARY: " << PERCENT_RATIO(4, total_covered, total_bases)
               << " (" << total_covered << "/" << total_bases << ") "
               << "of reference covered" << endl;    }
     if (diploid)
     {    vec< vec<ho_interval> > covered( genome.size( )/2 );
          for ( int i = 0; i < aligns.isize( ); i++ )
          {    const look_align& la = aligns[i];
               int t = la.target_id;
               if ( t >= genome.size( )/2 ) t -= genome.size( )/2;
               covered[t].push_back( ho_interval( la.pos2( ), la.Pos2( ) ) );    }
          longlong total_bases = 0, total_covered = 0;
          vec<int> gaps;
          for ( int i = 0; i < genome.size( )/2; i++ )
          {    vec< pair<ho_interval, int> > covered2;
               CondenseIntervals( genome[i].size( ), covered[i], covered2 );
               for ( int j = 0; j < covered2.isize( ); j++ )
               {    if ( covered2[j].second == 0 )
                         gaps.push_back( 
                              covered2[j].first.Length( ) );    }    }
          ReverseSort(gaps);
          int min_gap = 1, gaps_to_show = 25;
          if ( gaps.isize( ) > gaps_to_show ) min_gap = gaps[gaps_to_show-1];
          if ( !brief )  out << "\ndiploid gaps of size >= " << min_gap << ":\n";
          for ( int i = 0; i < genome.size( )/2; i++ )
          {    vec< pair<ho_interval, int> > covered2;
               CondenseIntervals( genome[i].size( ), covered[i], covered2 );
               for ( int j = 0; j < covered2.isize( ); j++ )
               {    total_bases += covered2[j].first.Length( );
                    if ( covered2[j].second > 0 )
                         total_covered += covered2[j].first.Length( );
                    else
                    {    if ( !brief && covered2[j].first.Length( ) >= min_gap )
                         {    out << i << "." << covered2[j].first.Start( ) 
                                   << "-" << covered2[j].first.Stop( ) << " (" 
                                   << covered2[j].first.Length( )
                                   << " bases)\n";    }    }    }    }
          out << "\nSUMMARY: " << PERCENT_RATIO(4, total_covered, total_bases)
               << " (" << total_covered << "/" << total_bases << ") "
               << "of reference covered as diploid" << endl;    }    }

/**
    Function: PrintGraphStatistics

    Print out summary stats about the HyperKmerPath representing the entire
    assembly.

*/
void PrintGraphStatistics( ostream& out, const HyperKmerPath& h, 
     const int reference_components, const Bool DIPLOID, const Bool USE_TRUTH,
     const vec<look_align>& aligns, const vec< vec<int> >& aligns_index,
     const String& sub_dir, Bool reduced_dot = False )
{
     out << "\nGRAPH STATISTICS:\n";
     int sourcesink = 0, ncomp = h.ConnectedComponents( );
     for ( int v = 0; v < h.N( ); v++ )
     {    if ( h.Source(v) ) ++sourcesink;
          if ( h.Sink(v) ) ++sourcesink;    }    

     // Compute N50 component size.  We first compute the size of each
     // component, here defined to be the sum of the component's
     // edges, which is not quite right but usually close enough.  To
     // compensate for duplicate edges, we only take the longest edge
     // between any two vertices.

     equiv_rel e;
     h.ComponentRelation(e);
     vec<int> reps;
     e.OrbitReps(reps);
     vec<int> component_sizes( reps.size( ), 0 );
     for ( int i = 0; i < reps.isize( ); i++ )
     {    static vec<int> o;
          e.Orbit( reps[i], o );
          for ( int j = 0; j < o.isize( ); j++ )
          {    int v = o[j];
               map<int,int> longestEdgeByVertex;
               for ( int l = 0; l < h.From(v).isize( ); l++ ) {    
                 int vertex = h.From(v)[l];
                 int length = h.EdgeObjectByIndexFrom( v, l ).KmerCount();
                 map<int,int>::iterator exists = longestEdgeByVertex.find( vertex );
                 if ( exists != longestEdgeByVertex.end() )
                   exists->second = max( exists->second, length );
                 else
                   longestEdgeByVertex.insert( make_pair( vertex, length ) );
               }
               for ( map<int,int>::iterator longest = longestEdgeByVertex.begin();
                     longest != longestEdgeByVertex.end(); ++longest )
                 component_sizes[i] += longest->second;
          }
     }
     Sort(component_sizes);
     out << ncomp << " components";
     if ( ncomp > 0 ) out << ", of N50 size " << N50(component_sizes);
     out << "\n";
     if ( reference_components > 0 )
     {    int nref = ( DIPLOID ? reference_components/2 : reference_components );
          out << max( 0, ncomp - nref ) << " gaps\n";    }

     // Describe vertices and edges.

     out << h.N( ) << " vertices\n";
     vec<int> X( h.EdgeObjectCount( ) );
     for ( int i = 0; i < X.isize( ); i++ )
          X[i] = h.EdgeLength(i);
     Sort(X);
     out << X.size( ) << " edges";
     if ( X.nonempty( ) ) out << ", of N50 size " << N50(X);
     out << "\n";

     // Describe ambiguities.

     int amb = ncomp + X.size( ) - h.N( ), loop1 = 0, loop2 = 0, loop34 = 0;
     for ( int v = 0; v < h.N( ); v++ )
     {    for ( int j = 0; j < h.From(v).isize( ); j++ )
          {    int w = h.From(v)[j];
               if ( w != v ) continue;
               const KmerPath& p = h.EdgeObjectByIndexFrom( v, j );
               int n = p.KmerCount( );
               if ( n == 1 ) ++loop1;
               else if ( n == 2 ) ++loop2;    
               else if ( n == 3 || n == 4 ) ++loop34;    }    }
     int dip = 0;
     if (DIPLOID && USE_TRUTH)
     {    HyperKmerPath hr(h);
          for ( vrtx_t v = 0; v < h.N( ); v++ )
          {    for ( int j1 = 0; j1 < h.From(v).isize( ); j1++ )
               {    vrtx_t w1 = h.From(v)[j1];
                    edge_t e1 = h.EdgeObjectIndexByIndexFrom( v, j1 );
                    if ( !aligns_index[e1].solo( ) ) continue;
                    const look_align& la1 = aligns[ aligns_index[e1][0] ];
                    if ( !la1.FullLength( ) || la1.Errors( ) > 0 ) continue;
                    for ( int j2 = j1+1; j2 < h.From(v).isize( ); j2++ )
                    {    vrtx_t w2 = h.From(v)[j2];
                         edge_t e2 = h.EdgeObjectIndexByIndexFrom( v, j2 );
                         if ( w1 != w2 ) continue;
                         if ( !aligns_index[e2].solo( ) ) continue;
                         const look_align& la2 = aligns[ aligns_index[e2][0] ];
                         if ( !la2.FullLength( ) || la1.Errors( ) > 0 ) continue;
                         if ( Abs( la1.target_id - la2.target_id )
                              != reference_components/2 )
                         {    continue;    }
                         if ( la1.pos2( ) != la2.pos2( ) ) continue;
                         if ( la1.Pos2( ) != la2.Pos2( ) ) continue;
                         if ( h.EdgeObject(e1) == h.EdgeObject(e2) ) continue;
                         ++dip;
                         hr.DeleteEdgeFrom( v, j2 );
                         goto next_vertex;    }    }
               next_vertex: continue;    }
          if (reduced_dot)
          {    hr.RemoveUnneededVertices( );
               hr.CompressEdgeObjects( );
               hr.RemoveDeadEdgeObjects( );
               Ofstream( dot, sub_dir + "/hyper.fixed.reduced.dot" );
               hr.PrintSummaryDOT0w(dot);    }    }
     out << "ambiguities:\n";
     if (DIPLOID && USE_TRUTH) out << "    " << dip << " diploid ambiguities\n";
     out << "    " << loop1 << " perfect mononukes\n";
     out << "    " << loop2 << " perfect dinukes\n";
     out << "    " << loop34 << " perfect trinukes or tetranukes\n";
     out << "    " << amb - loop1 - loop2 - loop34 - dip << " other\n";    
     out << "    " << "-------------------------------------\n";
     out << "    " << amb - dip << " total";
     if (DIPLOID && USE_TRUTH) out << " (non-diploid)";
     out << "\n";    }

/**
   Function: EvaluateAssembly

   Print out an evaluation of the assembly, using truth data if
   available.  Note that this will reorder the HyperKmerPath h.

   Called at the end of <LocalizeReadsTail()>.

   Input/output parameters:

      h - a <HyperKmerPath> representing *the entire assembly*.
        On output, its <components> are reordered to follow the
	reference.

*/
void EvaluateAssembly( HyperKmerPath& h, const KmerBaseBroker* kbb,
     const String& data_dir, const String& wrun_dir, const String& sub_dir,
     const vecbasevector& genome, const Bool DIPLOID, const Bool USE_TRUTH,
     const Bool FILTER_ALIGNS, const Bool WRITE_ALIGNS, const Bool REORDER, 
     nbases_t MIN_LEN, String hkpFname, String showTrustedSubdir,
     HyperKmerPath *hInitial , const String& hkpFnameInitial,
     Bool VERT )
{   
     // Align merged HyperKmerPath edges to reference, then reorder the 
     // components so that they follow the reference.

     vec<look_align> aligns;
     vec< vec<int> > aligns_index;

     vec<TrustedPath> trusted_paths;
     if (USE_TRUTH)
     {    AlignHyperKmerPath( h, kbb, data_dir + "/genome", wrun_dir, aligns,
               aligns_index );

          if (FILTER_ALIGNS) 
               FilterAligns( h, aligns, aligns_index, trusted_paths, MIN_LEN );


	  if (REORDER) {
            double reorderclock = WallClockTime();
            ReorderToFollowReference( h, aligns, aligns_index );
            cout << TimeSince(reorderclock) 
                 << " used reordering to follow reference." << endl;
            if (FILTER_ALIGNS) 
              FilterAligns( h, aligns, aligns_index, trusted_paths, MIN_LEN );
	  }

          if (WRITE_ALIGNS) 
          {    double writeclock = WallClockTime();
               WriteLookAlignBinary( sub_dir + "/hyper.aligns", aligns ); 
               cout << TimeSince(writeclock) 
                    << " used writing aligns." << endl;    }

          BinaryWrite( sub_dir + "/hyper.tpaths", h );
          BinaryWrite( sub_dir + "/tpaths.dat", trusted_paths );

	  if ( !hInitial ) {
	    cout << " writing trusted paths graph..." << endl;
	    ShowTrustedPaths( h, trusted_paths, genome, sub_dir + showTrustedSubdir, MIN_LEN, hkpFname );
	    cout << " wrote trusted paths graph." << endl;
	  } else {
	    vec<look_align> aligns_initial;
	    vec< vec<int> > aligns_index_initial;

	    vec<TrustedPath> trusted_paths_initial;

	    AlignHyperKmerPath( *hInitial, kbb, data_dir + "/genome", wrun_dir, aligns_initial,
				aligns_index_initial );
	    
	    if (FILTER_ALIGNS) 
	      FilterAligns( *hInitial, aligns_initial, aligns_index_initial, trusted_paths_initial, MIN_LEN );

	    if (REORDER) {
              double reorderclock = WallClockTime();
              ReorderToFollowReference( *hInitial, aligns_initial, aligns_index_initial );
              cout << TimeSince(reorderclock) 
                   << " used reordering to follow reference." << endl;
              if (FILTER_ALIGNS) 
                FilterAligns( *hInitial, aligns_initial, aligns_index_initial, trusted_paths_initial, MIN_LEN );
	    }

	    cout << " writing trusted paths graph..." << endl;
	    ShowTrustedPaths( h, trusted_paths, hkpFname, *hInitial, trusted_paths_initial, hkpFnameInitial, VERT,
			      genome, sub_dir + showTrustedSubdir, MIN_LEN );
	    cout << " wrote trusted paths graph." << endl;
	    
	  }
     }

     int K = h.K( );
     vec<vrtx_t> to_right_vertex, to_left_vertex;
     h.ToRight(to_right_vertex), h.ToLeft(to_left_vertex);

     // Print text version of merged HyperKmerPath.

     String bar = "=============================================================="
          "======================";
     String dbar = ".............................................................."
          "......................";
     if (USE_TRUTH)
     {    cout << "\n" << bar << "\n"
               << "EVALUATION OF ASSEMBLY VERSUS REFERENCE\n" << bar << "\n";
          PrintAlignedHyperKmerPath( cout, h, kbb, genome, aligns, 
               aligns_index, True, &trusted_paths );    }
     else h.PrintSummaryPlus( cout, 0, 0, 0, 0, 0, False );

     // Write brief and full reports in haploid case.

     if ( USE_TRUTH && !DIPLOID )
     {    for ( int pass = 1; pass <= 2; pass++ )
          {    Ofstream( bout, sub_dir + "/report." 
                    + ( pass == 1 ? "brief" : "full" ) );
               bout << "EVALUATION OF ASSEMBLY VERSUS REFERENCE\n";
               PrintAlignedHyperKmerPath( bout, h, kbb, genome, aligns, 
                    aligns_index, True, &trusted_paths, pass == 1, DIPLOID );
               bout << "\n" << dbar << "\n";
               SummarizeReferenceCoverage( bout, genome, aligns, DIPLOID, 
                    pass == 1 );
               if ( pass == 2 )
               {    bout << "\nEdges of size >= 10 kb that do not align perfectly "
                         << "to reference:\n\n";
                    vec<Bool> perf( h.EdgeObjectCount( ), False );
                    for ( int i = 0; i < aligns.isize( ); i++ )
                    {    if ( aligns[i].FullLength( ) && aligns[i].Errors( ) == 0 )
                              perf[ aligns[i].query_id ] = True;    }
                    for ( int i = 0; i < perf.isize( ); i++ )
                    {    int n = h.EdgeObject(i).KmerCount( );
                         if ( perf[i] || n < 10000 ) continue;
                         bout << "[" << BaseAlpha(i) << "] - " 
                              << n << " kmers\n";    }    }
               PrintGraphStatistics( bout, h, genome.size( ), DIPLOID, USE_TRUTH, aligns,
                    aligns_index, sub_dir );    }    }

     // Check for putative misassemblies.

     if (USE_TRUTH) ReportMisassemblies( cout, h, aligns, aligns_index );

     // Summarize coverage of reference.

     if (USE_TRUTH) SummarizeReferenceCoverage( cout, genome, aligns, DIPLOID );

     // Output graph statistics.

     PrintGraphStatistics( cout, h, genome.size( ), DIPLOID, USE_TRUTH, aligns, aligns_index,
          sub_dir );

     // For diploid case, attempt to do some repairs.

     if ( DIPLOID && USE_TRUTH )
     {    cout << "\n" << bar << "\n"
               << "USING REFERENCE TO IMPROVE ASSEMBLY\n" << bar << "\n\n";
          HyperBasevector hb( h, *kbb );
          Ofstream( bout, sub_dir + "/report.brief" );
          bout << "EVALUATION OF MODIFIED DIPLOID ASSEMBLY VERSUS REFERENCE\n";

          // Find cases where only one allele is covered.

          vec< vec<ho_interval> > covered( genome.size( ) );
          vec< vec<int> > covered_by( genome.size( ) );
          for ( int i = 0; i < aligns.isize( ); i++ )
          {    const look_align& la = aligns[i];
               covered[la.target_id].push_back(
                    ho_interval( la.pos2( ), la.Pos2( ) ) );
               covered_by[la.target_id].push_back(i);    }

          // Now covered tells us which regions are covered by an alignment, an
          // covered_by tells us the corresponding alignment that the coverage came
          // from.

          vec<int> L( genome.size( ), 0 );
          for ( int g = 0; g < genome.size( ); g++ )
          {    SortSync( covered[g], covered_by[g] );
               for ( int i = 0; i < covered[g].isize( ); i++ )
                    L[g] = Max( L[g], covered[g][i].Length( ) );    }

          // Now L provides upper bounds for the lengths of the intervals in
          // covered.

          vec< pair<int,int> > snps_added;
          for ( int g = 0; g < genome.size( )/2; g++ )
          {    vec< vec<ho_interval> > cov(2);
               vec< vec<int> > cov_by(2);
               cov[0] = covered[g];
               cov[1] = covered[ g + genome.size( )/2 ];
               cov_by[0] = covered_by[g];
               cov_by[1] = covered_by[ g + genome.size( )/2 ];
               for ( int j1 = 0; j1 < 2; j1++ )
               {    int j2 = 1 - j1;
                    int g1 = g, g2 = g + genome.size( )/2;
                    if ( j1 == 1 ) swap( g1, g2 );

                    // Now we have homologous chromosomes g1 and g2.

                    for ( int i1 = 0; i1 < cov_by[j1].isize( ); i1++ )
                    {    const look_align& la1 = aligns[ cov_by[j1][i1] ];
                         if ( la1.Errors( ) > 0 ) continue;
                         if ( !aligns_index[ la1.query_id ].solo( ) )
                              continue;

                         // Now we have a perfect alignment la1 to chromosome 1.
                         // Is there a SNP under it which is uncovered on 
                         // chromosome 2?

                         ho_interval h1( la1.pos2( ), la1.Pos2( ) );
                         static vec<int> I;
                         OverlapIndices( h1, cov[j2], L[g2], I );
                         vec<int> snps_missing2;
                         for ( int u = la1.pos2( ); u < la1.Pos2( ); u++ )
                         {    if ( genome[g1][u] == genome[g2][u] ) continue;

                              // Now have a SNP at position u lying under la1.
                              // See if it is under an alignment on the other 
                              // chromosome.

                              Bool cov2 = False;
                              for ( int k = 0; k < I.isize( ); k++ )
                              {    const look_align& la2 
                                        = aligns[ cov_by[j2][ I[k] ] ];

                                   // Now we have an alignment la2 to chromosome 2,
                                   // whose coverage overlaps that of la1.
                                   //
                                   // See if u lies under la2.  Check for the 
                                   // special case where u lies in the K-1 end bases 
                                   // and has the wrong base.

                                   if ( !Member( la2.Extent2( ), u ) ) continue;
                                   if ( ( u < la1.pos2( ) + (K-1)
                                          || u >= la1.Pos2( ) - (K-1) )
                                        && 
                                        ( u < la2.pos2( ) + (K-1) 
                                          || u >= la2.Pos2( ) - (K-1) ) 
                                        && 
                                        !la2.rc1 )
                                   {    const basevector& e2 = 
                                             hb.EdgeObject( la2.query_id );
                                        if ( genome[g2][u] != e2[ u - la2.pos2( ) ] )
                                             continue;    }

                                   // Now we know that u lies under la2, so we don't
                                   // want to edit the assembly.

                                   cov2 = True;
                                   break;    }
                              if (cov2) continue;

                              // Now we know that u is covered by the perfect 
                              // alignment la1 on chromosome 1, but is not covered
                              // at all on chromosome 2.
          
                              cout << "see missing SNP at base " << g2 << "."
                                   << u << endl;
                              snps_added.push( g2, u );
                              snps_missing2.push_back(u);    }
                         if ( snps_missing2.empty( ) ) continue;

                         // Generate new edge.

                         basevector e;
                         e.SetToSubOf( genome[g2], la1.pos2( ), la1.extent2( ) );
                         int id = la1.query_id;
                         int v = to_left_vertex[id], w = to_right_vertex[id];
                         cout << "adding edge from " << v << " to " << w
                              << " having " << snps_missing2.size( ) << " SNPs\n";
                         hb.AddEdge( v, w, e );    }    }    }
          int total_snps = 0;
          for ( int g = 0; g < genome.size( )/2; g++ )
          {    for ( int j = 0; j < genome[g].isize( ); j++ )
                    if ( genome[g][j] != genome[ g + genome.size( )/2 ][j] )
                         ++total_snps;    }
          UniqueSort(snps_added);
          cout << "\n" << snps_added.size( ) << " SNPS ADDED ("
               << PERCENT_RATIO( 4, snps_added.isize( ), total_snps ) 
               << ")\n" << endl;

          // Build HyperKmerPath h2 corresponding to hb.  This is really ugly
          // and stupid.  We ought to instead make the subsequent alignment, etc.
          // take a HyperBasevector hb as input.

          vecbasevector bases;
          for ( int j = 0; j < hb.EdgeObjectCount( ); j++ )
               bases.push_back( hb.EdgeObject(j) );
          Mkdir777( wrun_dir + "/2" );
          bases.WriteAll( wrun_dir + "/2/reads.fastb" );
          longlong genome_size = 0;
          if (USE_TRUTH)
          {    for ( int i = 0; i < genome.size( ); i++ )
                    genome_size += genome[i].size( );    }
          else genome_size = StringOfFile( data_dir + "/genome.size", 1 ).Int( );
          vecKmerPath spaths;
          String KS = ToString(K);
          ReadsToPathsCoreY( bases, K, genome_size, spaths );
          spaths.WriteAll( wrun_dir + "/2/reads.paths.k" + KS );
          vecKmerPath spaths_rc(spaths);
          for ( int i = 0; i < spaths.size( ); i++ )
               spaths_rc[i].Reverse( );
          spaths_rc.WriteAll( wrun_dir + "/2/reads.paths_rc.k" + KS );
          vec<tagged_rpint> spathsdb;
          CreateDatabase( spaths, spaths_rc, spathsdb );
          BinaryWrite2( wrun_dir + "/2/reads.pathsdb.k" + KS, spathsdb );
          KmerBaseBroker* kbb2 = new KmerBaseBroker( wrun_dir + "/2", K );
          vec<KmerPath> these_paths;
          for ( int j = 0; j < hb.EdgeObjectCount( ); j++ )
               these_paths.push_back( spaths[j] );
          HyperKmerPath h2( K, hb, these_paths );

          // Redo evaluation.

          AlignHyperKmerPath( h2, kbb2, data_dir + "/genome", wrun_dir, aligns,
               aligns_index );
          if (FILTER_ALIGNS) FilterAligns( h2, aligns, aligns_index, trusted_paths, MIN_LEN );
          ReorderToFollowReference( h2, aligns, aligns_index );
          cout << "\n" << bar << "\n"
               << "EVALUATION OF IMPROVED ASSEMBLY VERSUS REFERENCE\n" 
               << bar << "\n";
          PrintAlignedHyperKmerPath( cout, h2, kbb2, genome, aligns, 
               aligns_index, True, &trusted_paths );
          PrintAlignedHyperKmerPath( bout, h2, kbb2, genome, aligns, 
               aligns_index, True, &trusted_paths, True, DIPLOID );
          bout << "\n" << dbar << "\n\n" << snps_added.size( ) << " SNPS ADDED ("
               << PERCENT_RATIO( 4, snps_added.isize( ), total_snps ) << ")\n";
          SummarizeReferenceCoverage( cout, genome, aligns, DIPLOID );
          SummarizeReferenceCoverage( bout, genome, aligns, DIPLOID, True );
          PrintGraphStatistics( cout, h2, genome.size( ), DIPLOID, USE_TRUTH, aligns, 
               aligns_index, sub_dir, True );
          PrintGraphStatistics( bout, h2, genome.size( ), DIPLOID, USE_TRUTH, aligns,
               aligns_index, sub_dir );
          cout << "\nFixed dot file is in\n" << RealPath(sub_dir)
               << "/hyper.fixed.dot.\n";
          cout << "\nReduced fixed dot file is in\n" << RealPath(sub_dir)
               << "/hyper.fixed.reduced.dot.\n";
          Ofstream( dot, sub_dir + "/hyper.fixed.dot" );
          h2.PrintSummaryDOT0w(dot);    }
     if ( USE_TRUTH ) cout << "\n" << "Brief report is in\n" << RealPath(sub_dir) 
          << "/report.brief.\n";    }

void FilterAligns( const HyperKmerPath& h, vec<look_align>& aligns,
     vec< vec<int> >& aligns_index, vec<TrustedPath>& trusted_paths,
     const int MIN_LEN )
{    double refclock = WallClockTime( );
     FilterByReference( h, h.K( ), aligns, aligns_index, trusted_paths );
     cout << TimeSince(refclock) << " used creating trusted paths" << endl;

     if ( MIN_LEN > 0 ) {
       double lenclock = WallClockTime( );
       FilterPathsByLength( trusted_paths, MIN_LEN, 0 );
       cout << TimeSince(lenclock) << " used filtering trusted paths by length" << endl;
     }

     double alignclock = WallClockTime( );
     FilterPathsByAlignDominance( trusted_paths );
     cout << TimeSince(alignclock) << " used filtering trusted paths by align dominance." << endl;

     double edgeclock = WallClockTime( );
     FilterPathsByEdgeDominance( trusted_paths, aligns_index.size() );
     cout << TimeSince(edgeclock) << " used filtering trusted paths by edge dominance." << endl;

     TrustedPathsToIndexedAligns( trusted_paths, h.EdgeObjectCount( ),
          aligns, aligns_index );    }



