/////////////////////////////////////////////////////////////////////////////
//                   SOFTWARE COPYRIGHT NOTICE AGREEMENT                   //
//       This software and its documentation are copyright (2005) by the   //
//   Broad Institute/Massachusetts Institute of Technology.  All rights    //
//   are reserved.  This software is supplied without any warranty or      //
//   guaranteed support whatsoever. Neither the Broad Institute nor MIT    //
//   can be responsible for its use, misuse, or functionality.             //
/////////////////////////////////////////////////////////////////////////////

// ---------------------------------------------------------------------------------

// UnipathLocs.  Generate read locations on normal unipaths, as a 
// vec<read_location_short>, where "normal" is defined by MAX_COPY_NUMBER and
// MIN_KMERS.  As compared to using all the unipaths, this saves space.
// Sort by contig, and for fixed contig, by read.  Provide a separate index by 
// read.  Files created:
// reads.unilocs.K.MAX_COPY_NUMBER.MIN_KMERS[.rindex].

#ifndef FORCE_DEBUG
     #define NDEBUG
#endif

#include "Basevector.h"
#include "Feudal.h"
#include "FeudalTemplate.h"
#include "MainTools.h"
#include "ReadLocation.h"
#include "VecTemplate.h"
#include "paths/KmerPath.h"
#include "paths/PdfEntry.h"

int main( int argc, char *argv[] )
{
     RunTime( );

     BeginCommandArguments;
     CommandArgument_String(PRE);
     CommandArgument_String(DATA);
     CommandArgument_String(RUN);
     CommandArgument_Int(K);
     CommandArgument_Int_OrDefault(MAX_COPY_NUMBER, 10);
     CommandArgument_Int_OrDefault(MIN_KMERS, 1);
     EndCommandArguments;

     // Set up directories.

     String run_dir = PRE + "/" + DATA + "/" + RUN;

     // Read in read paths, unipaths, reads, and pairing data.

     vecKmerPath paths( run_dir + "/reads.paths.k" + ToString(K) );
     vecKmerPath paths_rc( run_dir + "/reads.paths_rc.k" + ToString(K) );
     vecKmerPath unipaths( run_dir + "/reads.unipaths.k" + ToString(K) );
     int nreads = paths.size( ), nuni = unipaths.size( );
     BREAD2( run_dir + "/reads.unipathsdb.k" + ToString(K), 
          vec<tagged_rpint>, unipathsdb );

     // Compute length of each unipath.

     vec<int> ulen(nuni);
     for ( int i = 0; i < nuni; i++ )
          ulen[i] = unipaths[i].KmerCount( );

     // Read in output of UnipathCoverage.

     vecvec<pdf_entry> cp;
     cp.ReadAll( run_dir + "/reads.unipaths.predicted_count.k" + ToString(K) );

     // Define the normal unipaths.

     vec<Bool> normal(nuni, False);
     for ( int i = 0; i < nuni; i++ )
     {    if ( ulen[i] < MIN_KMERS ) continue;
          int copyno = -1;
          double maxp = 0;
          for ( int j = 0; j < cp[i].size( ); j++ )
          {    if ( cp[i][j].second > maxp )
               {    copyno = cp[i][j].first;
                    maxp = cp[i][j].second;    }    }
          if ( copyno > MAX_COPY_NUMBER ) continue;
          normal[i] = True;    }

     // Find read placements on unipaths.  

     vec<read_location_short> ulocs;
     for ( int id = 0; id < nreads; id++ )
     {    for ( int pass = 1; pass <= 2; pass++ )
          {    const KmerPath& p = ( pass == 1 ? paths[id] : paths_rc[id] );
               static vec< pair<int,int> > uo;
               uo.clear( );
               for ( int j = 0; j < p.NSegments( ); j++ )
               {    const KmerPathInterval& I = p.Segment(j);
                    static vec<longlong> locs;
                    Contains( unipathsdb, I, locs );
                    for ( int u = 0; u < locs.isize( ); u++ )
                    {    const tagged_rpint& t = unipathsdb[ locs[u] ];
                         int uid = t.PathId( );
                         if ( !normal[uid] ) continue;
                         longlong offset = t.Start( ) - I.Start( );
                         for ( int r = 0; r < j; r++ )
                              offset += p.Segment(r).Length( );
                         for ( int r = 0; r < t.PathPos( ); r++ )
                              offset -= unipaths[uid].Segment(r).Length( );
                         uo.push_back( make_pair( uid, int(offset) ) );    }    }
               UniqueSort(uo);
               for ( int t = 0; t < uo.isize( ); t++ )
               {    int uid = uo[t].first, offset = uo[t].second;
                    ulocs.push_back( read_location_short( id, uid, -offset, 
                         ( pass == 1 ? ForwardOr : ReverseOr ) ) );    }    }    }
     sort( ulocs.begin( ), ulocs.end( ), cmp_contig_read );

     // Generate output files.

     vecvec<int> index;
     BuildIndex( ulocs, index, mem_fun_ref(&read_location_short::ReadId), nreads );
     String filehead = run_dir + "/reads.unilocs." + ToString(K) + "."
          + ToString(MAX_COPY_NUMBER) + "." + ToString(MIN_KMERS);
     BinaryWrite2( filehead, ulocs );
     index.WriteAll( filehead + ".indexr" );    }
