/////////////////////////////////////////////////////////////////////////////
//                   SOFTWARE COPYRIGHT NOTICE AGREEMENT                   //
//       This software and its documentation are copyright (2005) by the   //
//   Broad Institute/Massachusetts Institute of Technology.  All rights    //
//   are reserved.  This software is supplied without any warranty or      //
//   guaranteed support whatsoever. Neither the Broad Institute nor MIT    //
//   can be responsible for its use, misuse, or functionality.             //
/////////////////////////////////////////////////////////////////////////////

#ifndef FORCE_DEBUG
     #define NDEBUG
#endif

#include "Basevector.h"
#include "paths/simulation/SimTrueSequenceBroker.h"
#include <algorithm>

void SimTrueSequenceBroker::LoadData( String dataDir, String runDir, 
				      int k, const vec<int>& readLengths ) {
  K = k;
  max_read = *max_element( readLengths.begin(), readLengths.end() );

       READX( runDir + "/reads.ref.locs", true_locs );
       true_locs_by_read.resize( readLengths.size( ), -1 );
       for ( int i = 0; i < true_locs.isize( ); i++ )
       {    int id = true_locs[i].ReadId( );
            ForceAssertEq( true_locs_by_read[id], -1 );
            true_locs_by_read[id] = i;    }
       for ( int i = 0; i < true_locs.isize( ); i++ )
       {    int id = true_locs[i].ReadId( );
            ForceAssertGe( true_locs_by_read[id], 0 );    }    
       if ( IsRegularFile( runDir + "/genome.paths.k" + ToString(K) ) )
       {    mode = 1;
            genome_paths.ReadAll( runDir + "/genome.paths.k" + ToString(K) );
            genome_starts.resize( genome_paths.size( ) );
            for ( int i = 0; i < genome_paths.size( ); i++ )
            {    int x = 0;
                 for ( int j = 0; j < genome_paths[i].NSegments( ); j++ )
                 {    genome_starts[i].push_back(x);
                      x += genome_paths[i].Segment(j).Length( );    }    }    }
       else if ( IsRegularFile( dataDir + "/genome.paths.k" + ToString(K) ) )
       {    mode = 1;
            genome_paths.ReadAll( dataDir + "/genome.paths.k" + ToString(K) );
            genome_starts.resize( genome_paths.size( ) );
            for ( int i = 0; i < genome_paths.size( ); i++ )
            {    int x = 0;
                 for ( int j = 0; j < genome_paths[i].NSegments( ); j++ )
                 {    genome_starts[i].push_back(x);
                      x += genome_paths[i].Segment(j).Length( );    }    }    }
       else
       {    mode = 2;
            if ( !IsRegularFile( dataDir + "/genome.fastb" ) )
            {    FatalErr( "SimTrueSequenceBroker needs to have either "
                      << "genome.fastb or genome.paths.k" << K << "." );    }
            genome.ReadAll( dataDir + "/genome.fastb" );    }
}


void SimTrueSequenceBroker::SetInsert( int id1, int id2 ) {

         const read_location& rl1 = true_locs[ true_locs_by_read[id1] ];
         const read_location& rl2 = true_locs[ true_locs_by_read[id2] ];
         m = rl1.Contig( );
         ForceAssertEq( m, rl2.Contig( ) );
         ForceAssert( rl1.Fw( ) != rl2.Fw( ) );
         if ( rl1.Fw( ) )
         {    start = rl1.StartOnContig( ); 
              stop = rl2.StopOnContig( ) + 1;    }
         else
         {    start = rl2.StartOnContig( );
              stop = rl1.StopOnContig( ) + 1;    }

  rl1_p = &rl1;
  rl2_p = &rl2;
}

void SimTrueSequenceBroker::SetRegion( int newcontig, int newstart, int newstop ) {
  m = newcontig;
  start = newstart;
  stop = newstop;

  // We just need to set rl1_p to point to *some* fw-oriented read:
  for ( unsigned int i = 0; i < true_locs.size(); ++i )
    if ( true_locs[i].Fw() ) {
      rl1_p = &true_locs[i];
      break;
    }
}

void SimTrueSequenceBroker::GetTruth( basevector& truth ) const 
{    if ( KmerPathMode( ) )
     {    FatalErr( "GetTruth( basevector& ) not implemented for the case "
               << "where there is no file genome.fastb." );    }
     truth.SetToSubOf( genome[m], start, stop - start );
     truth.ReverseComplement( );    }

void SimTrueSequenceBroker::GetTruth( KmerPath& truth ) const {
         if ( !KmerPathMode( ) )
         {    FatalErr( "GetTruth( KmerPath& ) not implemented for the case "
                   << "where there is no file genome.paths.k" << K << "." );    }
         truth.Clear( );
         const vec<int>& gs = genome_starts[m];
         int x = upper_bound( gs.begin( ), gs.end( ), start ) - gs.begin( ) - 1;
         int nkmers = stop - start - (int) K + 1;
         if ( nkmers < 1 )
         {    cout << "Warning: true path has less than K bases.\n"
                   << "Leaving truth as empty KmerPath.\n";    }
         else
         {    const KmerPath& g = genome_paths[m];
              int lead = start - gs[x];
              for ( int j = x; j < g.NSegments( ); j++ )
              {    const KmerPathInterval& I = g.Segment(j);
                   int kmers_to_use = Min( I.Length( ) - lead, nkmers );
                   truth.AddSegment( 
                        I.Start( ) + lead, I.Start( ) + lead + kmers_to_use - 1 );
                   nkmers -= kmers_to_use;
                   lead = 0;
                   if ( nkmers == 0 ) break;    }    }
	 // Note that mux searching walks right-to-left, so rl1 should be
	 // RC and on the right.
         if ( rl2_p->Rc( ) ) truth.Reverse( );
}

void SimTrueSequenceBroker::GetFullyContainedReads( 
			       vec<int>& contained_reads_id1_dir,
			       vec<int>& contained_reads_id2_dir ) const {
         if ( !KmerPathMode( ) )
         {    FatalErr( "GetFullyContainedReads not implemented for the case "
                   << "where there is no file genome.paths.k" << K << "." );    }
	 contained_reads_id1_dir.clear( );
	 contained_reads_id2_dir.clear( );
         read_location m_start;
         m_start.SetContig(m), m_start.SetStartOnContig( start - max_read );
         int first = lower_bound( 
               true_locs.begin( ), true_locs.end( ), m_start ) - true_locs.begin( );
         for ( int i = first; i < true_locs.isize( ); i++ )
         {    const read_location& tl = true_locs[i];
              if ( tl.Contig( ) != m || tl.Start( ) >= stop ) break;
              if ( tl.Start( ) < start || tl.Stop( )+1 > stop ) continue;
	      if ( tl.Fw() == rl1_p->Fw() )
	           contained_reads_id1_dir.push_back( tl.ReadId( ) );
	      else
		   contained_reads_id2_dir.push_back( tl.ReadId( ) );    }
}

void SimTrueSequenceBroker::GetReferenceSizes( vec<int>& ref_sizes ) const {
  if ( genome.empty() )
    // Compute from paths.
    for ( int i = 0; i < genome_paths.size(); ++i )
      ref_sizes.push_back( genome_paths[i].KmerCount() + K-1 );
  else 
    // Compute from bases.
    for ( int i = 0; i < genome.size(); ++i )
      ref_sizes.push_back( genome[i].size() );
}

