// Copyright (c) 2005 Broad Institute/Massachusetts Institute of Technology

// Program: GenomeToPaths
// 
// Given sequence for a genome as a fastb file, generate one
// <KmerPath> for each record in the fastb file (i.e. for each <genome part>).
// This KmerPath may contain adjacent and concatenatable intervals (i.e. not
// be the "minimal" / canonical / most-compressed representation of this sequence
// of kmers).
//
// Command-line arguments: HEAD, K.
//
// Input:
// HEAD.fastb = genome fastb file taken as input
//
// Output:
// HEAD.paths.k* = output paths for genome, where * is K.
//
// Intermediates:
// HEAD.reads.fastb
// HEAD.reads.paths.k*
// HEAD.reads.mutmers.gz

#ifndef FORCE_DEBUG
     #define NDEBUG
#endif

#include "Basevector.h"
#include "Feudal.h"
#include "math/Functions.h"
#include "MainTools.h"
#include "paths/KmerPath.h"

int main( int argc, char *argv[] )
{
     RunTime( );

     BeginCommandArguments;
     CommandArgument_String_OrDefault(HEAD, "genome");
     CommandArgument_Int(K);
     EndCommandArguments;

     longlong genome_size = 0;
     const int read_length = 10000;
     int nreads = 0;
     vec<int> contig;
     int ncontigs;
     String HEAD_reads = HEAD + ".reads.K" + ToString(K);
     {    vecbasevector genome( HEAD + ".fastb" );
          ncontigs = genome.size( );
          for ( int i = 0; i < ncontigs; i++ )
               genome_size += genome[i].size( );
          vecbasevector reads;
          longlong reads_rawsize = 0;
          for ( int pass = 1; pass <= 2; pass++ )
          {    if ( pass == 2 ) 
               {    reads.Reserve( reads_rawsize, nreads );
                    contig.reserve(nreads);    }
               for ( int i = 0; i < genome.size( ); i++ )
               {    const basevector& g = genome[i];
                    for ( int start = 0; start < g.isize( ); start++ )
                    {    int len = Min( read_length, g.isize( ) - start );
                         if ( pass == 1 )
                         {    nreads++;
                              reads_rawsize += (len+15)/16;    }
                         else
                         {    static basevector b;
                              b.SetToSubOf( g, start, len );
                              reads.push_back(b);
                              contig.push_back(i);    }
                         if ( start + len == g.isize( ) ) break;
                         start += len - K;    }    }    }
          reads.WriteAll( HEAD_reads + ".fastb" );    }
     
// MakeDepend: dependency ReadsToPaths
     int status = System( "ReadsToPaths HEAD=" + HEAD_reads + " K=" + ToString(K)
          + " GENOME_SIZE=" + ToString(genome_size)
          + " USE_QUALITY_SCORES=False PATHS_ONLY=True" );
     if ( status != 0 ) FatalErr( "ReadsToPaths failed." );
     Remove( HEAD_reads + ".fastb" );

     vecKmerPath readpaths, paths;
     readpaths.ReadAll( HEAD_reads + ".paths.k" + ToString(K) );
     PRINT2( readpaths.size( ), ncontigs );
     paths.resize(ncontigs);
     longlong nsegs = 0;
     for ( int i = 0; i < nreads; i++ )
          nsegs += readpaths[i].NSegments( );
     PRINT(nsegs);
     paths.Reserve( nsegs, ncontigs );
     for ( int i = 0; i < nreads; i++ )
     {    for ( int j = 0; j < readpaths[i].NSegments( ); j++ )
          paths[ contig[i] ].AddSegment( readpaths[i].Segment(j) );    }
     paths.WriteAll( HEAD + ".paths.k" + ToString(K) );
     Remove( HEAD + ".reads.paths.k" + ToString(K) );
     Remove( HEAD_reads + ".paths.k" + ToString(K) );    }
