/////////////////////////////////////////////////////////////////////////////
//                   SOFTWARE COPYRIGHT NOTICE AGREEMENT                   //
//       This software and its documentation are copyright (2006) by the   //
//   Broad Institute/Massachusetts Institute of Technology.  All rights    //
//   are reserved.  This software is supplied without any warranty or      //
//   guaranteed support whatsoever. Neither the Broad Institute nor MIT    //
//   can be responsible for its use, misuse, or functionality.             //
/////////////////////////////////////////////////////////////////////////////

// Define two programs, one to find sequence bridges that close gaps, and another
// to close gaps.  These two programs need to be in sync.

#ifndef FORCE_DEBUG
     #define NDEBUG
#endif

#include "Basevector.h"
#include "CoreTools.h"
#include "graph/Digraph.h"
#include "paths/BridgeGaps.h"
#include "paths/HyperKmerPath.h"
#include "paths/KmerBaseBroker.h"
#include "paths/KmerPath.h"
#include "paths/PairedPair.h"

// UniqueShortClosure: roughly, determine if a pair has a unique closure in 
// base-space of <= K.  Return the number of bases in the overlap, else 0.  

int UniqueShortClosure( const pp_pair& p, const HyperKmerPath& h, 
     const KmerBaseBroker& kbb, Bool verbose = False )
{    
     // Compute the range [low,high] of possible overlaps, in bases.

     int K = h.K( );
     if ( p.Gap( ) < -10 ) return 0;
     double overlap = -(p.Gap( ) - K + 1), dev = p.Dev( );
     int low = iceil( overlap - 4.0 * dev ), high = ifloor( overlap + 4.0 * dev );
     if ( low <= 0 ) return 0;
     if (verbose)
     {    cout << "looking for unique short closure of " << p << "\n";
          PRINT2( low, high );    }

     // Define KmerPaths f and t, where f is at the end of p.Left and t is at the
     // beginning of p.Right.

     KmerPath f, t;
     int from_start, to_stop, from_bases = K - 1, to_bases = K - 1;
     for ( from_start = p.Left( ).isize( ) - 1; from_start >= 0; from_start-- )
     {    from_bases += h.EdgeObject( p.Left(from_start) ).KmerCount( );
          if ( from_bases >= high ) break;    }
     if ( from_start < 0 ) return 0;
     for ( int j = from_start; j < p.Left( ).isize( ); j++ )
          f.Append( h.EdgeObject( p.Left(j) ) );
     for ( to_stop = 0; to_stop < p.Right( ).isize( ); to_stop++ )
     {    t.Append( h.EdgeObject( p.Right(to_stop) ) );
          to_bases += h.EdgeObject( p.Right(to_stop) ).KmerCount( );
          if ( to_bases >= high ) break;    }
     if ( to_stop == p.Right( ).isize( ) ) return 0;

     // Test for unique overlap.

     static basevector fb, tb;
     fb = kbb.Seq(f), tb = kbb.Seq(t);
     static vec<int> overlaps;
     overlaps.clear( );
     for ( int j = low; j <= high; j++ )
          if ( Overlap( fb, tb, j ) ) overlaps.push_back(j);
     if ( overlaps.size( ) != 1 ) return 0;
     int over = overlaps[0];
     if ( over > K ) return 0;
     return over;    }

void BridgeGaps( const vec<pp_pair>& ppp, const vec< vec<pp_closure> >& ppclosures,
     const HyperKmerPath& h, const KmerBaseBroker& kbb, vec<basevector>& bridges, 
     Bool verbose )
{    
     // Set up data structures.

     bridges.clear( );
     int K = h.K( );
     vec<vrtx_t> to_left, to_right;
     h.ToLeft(to_left), h.ToRight(to_right);
     vec<int> len;
     for ( int i = 0; i < h.EdgeObjectCount( ); i++ )
          len.push_back( h.EdgeLength(i) );

     // Method 1.

     for ( int i = 0; i < ppclosures.isize( ); i++ )
     {    const pp_pair& p = ppp[i];
          if ( ppclosures[i].empty( ) )
          {    int over = UniqueShortClosure( p, h, kbb, verbose );
               if ( over == 0 ) continue;
               int from = p.Left( ).back( ), to = p.Right( ).front( );
               const KmerPath &f = h.EdgeObject(from), &t = h.EdgeObject(to);
               basevector fb = kbb.Seq(f), tb = kbb.Seq(t);
               basevector bridge( 2 * K - over );
               for ( int j = 0; j < K; j++ )
               {    bridge.Set( j, fb[ fb.size( ) - K + j ] );
                    bridge.Set( j + K - over, tb[j] );    }
               bridge.ReverseComplement( );
               bridges.push_back(bridge);    
               if (verbose)
                    cout << "bridging [" << i << "] " << p << "\n";    }    }    
     UniqueSort(bridges);

     // Method 2.

     vec<pp_read> reads, readsL, readsR;
     for ( int i = 0; i < ppp.isize( ); i++ )
          reads.push_back( ppp[i].Left( ), ppp[i].Right( ) );
     UniqueSort(reads);
     for ( int i = 0; i < reads.isize( ); i++ )
     {    if ( h.Sink( to_right[ reads[i].back( ) ] ) ) 
               readsL.push_back( reads[i] );
          if ( h.Source( to_left[ reads[i].front( ) ] ) ) 
               readsR.push_back( reads[i] );    }
     vec<basevector> bridges2;
     for ( int i = 0; i < ppp.isize( ); i++ )
     {    const pp_pair &p = ppp[i];
          const pp_read &L = p.Left( ), &R = p.Right( );
          vec< pair<int,int> > Loverlaps, Roverlaps;
          vec<int> offsets;
          for ( int j = 0; j < readsL.isize( ); j++ )
          {    GetOverlaps( L, readsL[j], offsets );
               for ( int l = 0; l < offsets.isize( ); l++ )
                    Loverlaps.push( j, offsets[l] );    }
          for ( int j = 0; j < readsR.isize( ); j++ )
          {    GetOverlaps( R, readsR[j], offsets );
               for ( int l = 0; l < offsets.isize( ); l++ )
                    Roverlaps.push( j, offsets[l] );    }
          pp_read lnew, rnew;
          for ( int il = 0; il < Loverlaps.isize( ); il++ )
          {    const pp_read& Ladd = readsL[ Loverlaps[il].first ];
               int lo = Loverlaps[il].second;
               JoinReads( L, Ladd, lo, lnew );
               for ( int ir = 0; ir < Roverlaps.isize( ); ir++ )
               {    const pp_read& Radd = readsR[ Roverlaps[ir].first ];
                    int ro = Roverlaps[ir].second;
                    JoinReads( R, Radd, ro, rnew );
                    double gapshrink = 0.0;
                    for ( int x = L.isize( ); x < Ladd.isize( ) + lo; x++ )
                         gapshrink += len[ Ladd[x-lo] ];
                    for ( int x = 0; x < -ro; x++ )
                         gapshrink += len[ Radd[x] ];
                    pp_pair pnew( lnew, rnew, p.Gap( ) - gapshrink, p.Dev( ) );
                    int over = UniqueShortClosure( pnew, h, kbb, verbose );
                    if ( over == 0 ) continue;
                    int from = pnew.Left( ).back( ), to = pnew.Right( ).front( );
                    const KmerPath &f = h.EdgeObject(from), &t = h.EdgeObject(to);
                    basevector fb = kbb.Seq(f), tb = kbb.Seq(t);
                    basevector bridge( 2 * K - over );
                    for ( int j = 0; j < K; j++ )
                    {    bridge.Set( j, fb[ fb.size( ) - K + j ] );
                         bridge.Set( j + K - over, tb[j] );    }
                    bridge.ReverseComplement( );
                    if ( BinMember( bridges, bridge ) ) continue;
                    bridges2.push_back(bridge);    }    }    }
     UniqueSort(bridges2);
     if (verbose) 
          cout << "method 2: added " << bridges2.size( ) << " extra bridges\n";
     bridges.append(bridges2);    }

void SimpleExtendPairedPairs( const HyperKmerPath& h, const KmerBaseBroker& kbb,
     vec<pp_pair>& ppp, const vec<int>& L, const double dmult, 
     const vec<Bool>& remove, const Bool simple, const Bool verbose )
{    int K = h.K( );
     digraphE<int> G( h, L );
     vec<vrtx_t> to_left, to_right;
     h.ToLeft(to_left), h.ToRight(to_right);
     for ( int i = 0; i < ppp.isize( ); i++ )
     {    if ( remove.nonempty( ) && remove[i] ) continue;
          pp_mpair p( ppp[i] );
          if ( ReadyToClose( p, dmult ) )
          {    static vec<pp_closure> cl;
               Bool trim = True;
               if (verbose) cout << "getting closures of " << p << "\n";
               GetClosures( p, L, dmult, cl, False, verbose );
               if ( cl.size( ) == 1 )
               {    double g = 0.0;
                    for ( int j = 0; j < cl[0].isize( ); j++ )
                         g -= L[ cl[0][j] ];
                    if ( cl[0].isize( ) > p.LeftSize( )
                         || cl[0].isize( ) > p.RightSize( )
                         || p.Gap( ) != g || p.Dev( ) != 0.0 )
                    {    ppp[i] = pp_pair( cl[0], cl[0], g, 0.0 );
                         continue;    }    }    }
          if (simple) continue;
          int over = UniqueShortClosure( p, h, kbb );
          if ( over == 0 ) continue;
          int v = to_right[ p.Left( ).back( ) ], w = to_left[ p.Right( ).front( ) ];
          int intervening_kmers = K - over - 1;
          if ( intervening_kmers == 0 )
          {    if ( v == w )
               {    pp_read r( p.Left( ) );
                    r.append( p.Right( ) );
                    int len = 0;
                    for ( int j = 0; j < r.isize( ); j++ )
                         len += L[ r[j] ];
                    if (verbose) cout << "closing " << p << ", case 1\n";
                    ppp[i] = pp_pair( r, r, -double(len), 0.0 );    }    }
          else if ( intervening_kmers < 0 )
          {    int trim = 0, trim_kmers = 0;
               for ( int j = 0; j < p.RightSize( ); j++ )
               {    trim_kmers += L[ p.Right(j) ];
                    trim++;
                    if ( intervening_kmers + trim_kmers >= 0 ) break;    }
               if ( intervening_kmers + trim_kmers == 0 )
               {    pp_read r( p.Left( ) );
                    for ( int j = trim; j < p.RightSize( ); j++ )
                         r.push_back( p.Right(j) );
                    int len = 0;
                    for ( int j = 0; j < r.isize( ); j++ )
                         len += L[ r[j] ];
                    if (verbose) cout << "closing " << p << ", case 2\n";
                    ppp[i] = pp_pair( r, r, -double(len), 0.0 );    }    }
          else // intervening_kmers > 0
          {    static vec< vec<vrtx_t> > paths;
               G.AllPathsFixedLength( v, w, intervening_kmers, paths );
               if ( paths.solo( ) )
               {    pp_read r( p.Left( ) );
                    r.append( paths[0] );
                    r.append( p.Right( ) );
                    int len = 0;
                    for ( int j = 0; j < r.isize( ); j++ )
                         len += L[ r[j] ];
                    if (verbose) cout << "closing " << p << ", case 3\n";
                    ppp[i] = pp_pair( r, r, -double(len), 0.0 );    }    }    }    }
