/////////////////////////////////////////////////////////////////////////////
//                   SOFTWARE COPYRIGHT NOTICE AGREEMENT                   //
//       This software and its documentation are copyright (2006) by the   //
//   Broad Institute/Massachusetts Institute of Technology.  All rights    //
//   are reserved.  This software is supplied without any warranty or      //
//   guaranteed support whatsoever. Neither the Broad Institute nor MIT    //
//   can be responsible for its use, misuse, or functionality.             //
/////////////////////////////////////////////////////////////////////////////

#ifndef FORCE_DEBUG
     #define NDEBUG
#endif

#include <set>

#include "Basevector.h"
#include "CoreTools.h"
#include "system/System.h"
#include "FastIfstream.h"
#include "Feudal.h"
#include "ParseSet.h"
#include "ReadLocation.h"
#include "ReadPairing.h"
#include "VecOverlap.h"
#include "KmerShape.h"
#include "kmer_freq/KmerFrequencyTable.h"
#include "kmer_freq/WriteKmerFrequencies.h"
#include "math/Functions.h"
#include "paths/HyperKmerPath.h"
#include "paths/KmerPath.h"
#include "paths/KmerBaseBroker.h"
#include "paths/LocalizeReadsTail.h"
#include "paths/PdfEntry.h"
#include "paths/SharedCounter.h"
#include "system/ParsedArgs.h"


void LocalizeReadsParallelLaunch( const String& PARALLEL_BATCHES,
     const vec<unipath_id_t>& seeds, const parsed_args& command, const String& data_dir,
     const String& run_dir, const String& sub_dir, const String& LSF_ARGS,
     const String& LSF_HOST, const int K, const Bool USE_TRUTH,
     const int MIN_OVERLAP_FINAL, const int MIN_PROPER_OVERLAP_FINAL,
     const Bool GLOBAL_CLEAN, const Bool TRACK_GLOBAL_CLEAN_CHANGES, 
     const int SHOW_PAIR_ALIGNS, const Bool FILTER_ALIGNS, const Bool FINAL_MERGE, 
     const Bool BASIC_DEBUG, const Bool DIPLOID,
     const Bool DISAMBIGUATE_SIMPLE_LOOPS_VERBOSE, const Bool PULL_VERBOSE,
     const int MIN_COMPONENT, const int MAX_SHORT_INSERT_SEP,
     const String& unilocs_file, const String& wrun_dir,
     const Bool NEW_POORLY_COVERED )

{

     int nbatches;
     vec<String> boxes;
     if ( PARALLEL_BATCHES.IsInt( ) ) nbatches = PARALLEL_BATCHES.Int( );
     else
     {    ParseStringSet( PARALLEL_BATCHES, boxes );
          nbatches = boxes.size( );    }
     int nseeds = seeds.size( );
     {    Ofstream( seedout, sub_dir + "/seedlist" );
          for ( int i = 0; i < nseeds; i++ )
               seedout << seeds[i] << "\n";
          Ofstream( currentout, sub_dir + "/current_seed" );
          currentout << 0 << "\n";
          Ofstream( instanceout, sub_dir + "/current_instance" );
          instanceout << 0 << "\n";    }
     parsed_args new_command(command);
     new_command.RemoveArg("PARALLEL_BATCHES");
     new_command.RemoveArg("LSF_ARGS");
     String C = new_command.TheCommand();
     C += " COUT=False";
     C += " EXIT_AT=hyperbases";
     vec<int> pid(nbatches);

     // Remove any old parallel control files

     for (int i = 0; i < nbatches; i++) {
	 String dirname = sub_dir + "/" + ToString(i);
	 Remove(dirname + "/SLAVE_STARTED");
	 Remove(dirname + "/SLAVE_FINISHED");
       }

     // Open instance counter file

     String instance_file = sub_dir + "/current_instance";
     int pc_instance_fd = open( instance_file.c_str() , O_RDWR, 0664 );
     cout << nbatches << " parallel jobs starting" << endl;

     // Start parallel processes and LSF jobs

	       int LSF_slaves = 0;
               for ( int i = 0; i < nbatches; i++ ) {
		 if (!boxes.empty() && boxes[i] == "LSF") {
		   ++LSF_slaves;
		 } else {
		   String prefix, postfix;
		   if ( boxes.empty( ) ) {
		     prefix = "";
		     //postfix = " > /dev/null 2>&1"; 
		     postfix = String(" > ") + sub_dir + "/LocalizeReadsSlaveLog." + ToString(i) + ".log  2>&1"; 
		   }  else {
		     int priority = getpriority( PRIO_PROCESS, 0 );
		     prefix = "ssh " + boxes[i] + " 'nice +" 
		       + ToString(priority) + " ";
		     postfix = " >& /dev/null'"; 
		   }
		   // Start next process
		   pid[i] = Fork( prefix + C 
				  + " SEEDS=@" + sub_dir + "/seedlist"
				  + " PARALLEL_CONTROL=" + sub_dir + "/current_seed "
				  + postfix );
		 }
	       }
	       
	       // Submit LSF jobs

	       if (LSF_slaves > 0) {
		 cout << "Submitting " << LSF_slaves << " jobs to LSF queue\n";
		 String prefix = "ssh " + LSF_HOST + " 'bsub " + LSF_ARGS +
		   " -J \"" + sub_dir + "_" + ToString(getpid()) +
		   "[1-" + ToString(LSF_slaves) + "]\" " +
		   "-o /dev/null ";
		 String postfix = "'"; 
		 Fork( prefix + C 
		       + " SEEDS=@" + sub_dir + "/seedlist"
		       + " PARALLEL_CONTROL=" + sub_dir + "/current_seed "
		       + postfix );
	       }

	       // Wait for all running jobs on to finish

	       set<int> working_slaves;
	       int slave_count = 0;
	       bool slave_found = false;
	       while (!working_slaves.empty() || !slave_found) {
		 InspectSharedCounter(pc_instance_fd, slave_count);
		 for (int i = 0; i < slave_count; i++) {
		   String slave_dir = sub_dir + "/" + ToString(i);
		   if (IsRegularFile(slave_dir + "/SLAVE_STARTED")) {
		     working_slaves.insert(i);
		     slave_found = true;
		   }
		   if (IsRegularFile(slave_dir + "/SLAVE_FINISHED")) 
		     working_slaves.erase(i);
		 }
		 sleep(1);
	       }

     cout << slave_count << " parallel jobs completed" << endl;

     vec<String> nhoods;
     vec<int> seeds_found;
     for ( int i = 0; i < slave_count; i++ ) 
     {    if (IsRegularFile(sub_dir + "/" + ToString(i) + "/SLAVE_FINISHED")) 
          {    fast_ifstream 
	            in( sub_dir + "/" + ToString(i) + "/LocalizeReads.out" );
               String line;
               vec<char> this_nhood;
               while(1)
               {    getline( in, line );
                    if ( in.fail( ) )
                    {    if ( this_nhood.nonempty( ) ) 
                              nhoods.push_back(String(this_nhood));
                         break;    }
                    if ( line.Contains( "Terminating at hyperbases" ) )
                    {    if ( this_nhood.nonempty( ) ) 
                              nhoods.push_back(String(this_nhood));
                         break;    }
                    if ( line.Contains( "neighborhood of ", 0 ) )
                    {    if ( this_nhood.nonempty( ) ) 
                              nhoods.push_back(String(this_nhood));
                         this_nhood.clear( );
                         for ( int i = 0; i < line.isize( ); i++ )
                              this_nhood.push_back( line[i] );
                         this_nhood.push_back( '\n' );    }
                    else if ( this_nhood.nonempty( ) )
                    {    for ( int i = 0; i < line.isize( ); i++ )
                              this_nhood.push_back( line[i] );
                         this_nhood.push_back( '\n' );    }    }    }    }
     vec<HyperBasevector> hyperbases;
     for ( int i = 0; i < slave_count; i++ ) 
     {    if (IsRegularFile(sub_dir + "/" + ToString(i) + "/SLAVE_FINISHED")) 
          {    vec<HyperBasevector> hyperbasesi;
	       BinaryRead( sub_dir + "/" + ToString(i) + "/closures.bases", 
	            hyperbasesi );
	       hyperbases.append(hyperbasesi);    }    }
     SortSync( nhoods, hyperbases );
     cout << "\n";
     for ( int i = 0; i < nhoods.isize( ); i++ )
     {    cout << nhoods[i];
          seeds_found.push_back( nhoods[i].Between( "hood of ", ":" ).Int( ) );    }
     flush(cout);
     vec<int> sseeds(seeds);
     vec<int> sseeds_found(seeds_found);
     Sort(sseeds), Sort(sseeds_found);
     if ( sseeds != sseeds_found )
     {    cout << "Something went wrong in parallelization: ";
          cout << "seeds requested != seeds found.\n";
          for ( int i = 0; i < sseeds_found.isize( ); i++ )
          {    int j;
               for ( j = i + 1; j < seeds_found.isize( ); j++ )
                    if ( sseeds_found[j] != sseeds_found[i] ) break;
               if ( j - i > 1 )
               {    cout << "Seed " << sseeds_found[i] << " was "
                         << "processed " << j - i << " times.\n";    }
               i = j - 1;    }
          UniqueSort(sseeds_found);
          for ( int i = 0; i < sseeds.isize( ); i++ )
          {    if ( !BinMember( sseeds_found, sseeds[i] ) )
               {    cout << "Seed " << sseeds[i] 
                         << " was not processed.\n";    }    }
          for ( int i = 0; i < sseeds_found.isize( ); i++ )
          {    if ( !BinMember( sseeds, sseeds_found[i] ) )
               {    cout << "Non-seed " << sseeds_found[i] 
                         << " was processed.\n";    }    }
          cout << "Aborting." << endl;
          TracebackThisProcess( );    }
     cout << "MERGING OUTPUT OF PARALLEL RUNS" << endl;
     String KS = ToString(K);
     vecKmerPath paths( run_dir + "/reads.paths.k" + KS );
     vecKmerPath paths_rc( run_dir + "/reads.paths_rc.k" + KS );
     BREAD2( run_dir + "/reads.pathsdb.k" + KS, vec<tagged_rpint>, pathsdb );
     KmerBaseBroker* gkbb = new KmerBaseBroker(run_dir, K, paths, paths_rc, pathsdb);
     vecKmerPath unipaths( run_dir + "/reads.unipaths.k" + KS );
     vecbasevector reads( run_dir + "/reads.fastb" );
     int nreads = reads.size( );
     vec<read_pairing> pairs;
     ReadPairsFile( run_dir + "/reads.pairto", pairs );
     vec<int> pairs_index( nreads, -1 );
     for ( int i = 0; i < pairs.isize( ); i++ )
          pairs_index[ pairs[i].id1 ] = pairs_index[ pairs[i].id2 ] = i;
     vec<int> partner( nreads, -1 );
     for ( int i = 0; i < pairs.isize( ); i++ )
     {    partner[ pairs[i].id1 ] = pairs[i].id2;
          partner[ pairs[i].id2 ] = pairs[i].id1;    }
     vecbasevector genome;
     if (USE_TRUTH) genome.ReadAll( data_dir + "/genome.fastb" );
     BREAD2( unilocs_file, vec<read_location_short>, ulocs );
     vecvec<int> ulocs_indexr;
     ulocs_indexr.ReadAll( unilocs_file + ".indexr" );
     vecvec<pdf_entry> cp;
     cp.ReadAll( run_dir + "/reads.unipaths.predicted_count.k" + KS );
     vec<int> predicted_copyno( unipaths.size( ), -1 );
     for ( int i = 0; i < unipaths.size( ); i++ )
     {    int copyno = -1;
          double maxp = 0;
          for ( int j = 0; j < cp[i].size( ); j++ )
          {    if ( cp[i][j].second > maxp )
               {    copyno = cp[i][j].first;
                    maxp = cp[i][j].second;    }    }
          predicted_copyno[i] = copyno;    }
     vec<read_location> readlocs;
     vec<int> readlocs_index;
     if (USE_TRUTH)
     {    READX( run_dir + "/reads.ref.locs", readlocs );
          readlocs_index.resize(nreads);
          for ( int i = 0; i < readlocs.isize( ); i++ )
          {    const read_location& rl = readlocs[i];
               readlocs_index[ rl.ReadId( ) ] = i;    }    }
     LocalizeReadsTail( sub_dir, hyperbases, K, KS, wrun_dir, MIN_OVERLAP_FINAL, 
          MIN_PROPER_OVERLAP_FINAL, GLOBAL_CLEAN, SHOW_PAIR_ALIGNS, pairs, 
          pairs_index, partner, reads, gkbb, unipaths, ulocs, ulocs_indexr, 
          MAX_SHORT_INSERT_SEP, FILTER_ALIGNS, genome, FINAL_MERGE, USE_TRUTH, 
          data_dir, nreads, BASIC_DEBUG, predicted_copyno, MIN_COMPONENT, readlocs, 
          readlocs_index, DIPLOID, False, DISAMBIGUATE_SIMPLE_LOOPS_VERBOSE, 
          PULL_VERBOSE, TRACK_GLOBAL_CLEAN_CHANGES, False, NEW_POORLY_COVERED );    }

void JustPlotLocalKmerFrequencies( const vec< pair<read_id_t,orient_t> >& use, 
     const vecbasevector& reads, const String& run_dir )
{
            const int my_K = 20;

            vec<read_id_t> cloud_ids;
            cloud_ids.reserve( use.size() );
            for ( int i = 0; i < use.isize(); i++ )
              cloud_ids.push_back( use[i].first );
            cloud_ids.erase( unique( cloud_ids.begin(), cloud_ids.end() ), cloud_ids.end() );

            vecbasevector cloud_bases;
            longlong cloud_dyn_space = 0;
            for ( int i = 0; i < cloud_ids.isize(); i++ )
              cloud_dyn_space = reads[ cloud_ids[i] ].SizeOfDynamicData();
            cloud_bases.Reserve( cloud_dyn_space, cloud_ids.size() );
            for ( int i = 0; i < cloud_ids.isize(); i++ )
              cloud_bases.push_back( reads[ cloud_ids[i] ] );
            
            temp_file frequencies_file( "/tmp/LR_freqdata.XXXXXX" );
	    #define CALL_WRITE_FREQS(K, reads) \
               WriteKmerFrequencies< KmerShapeDefaultClass(K) >( reads, frequencies_file, true )
	    #define CALL_FOR_K(K) CALL_WRITE_FREQS( K, cloud_bases );
	    FOR_SUPPORTED_K( my_K, CALL_FOR_K );
            
            KmerFrequencyTable all_table( my_K, frequencies_file );
            vec<longlong> all_hist;
            all_table.GetHistogram( all_hist );
            
            vecbasevector cloud_true_bases;
            cloud_true_bases.Read( run_dir + "/reads.true.fastb", cloud_ids, 0 );
            
            vecbasevector cloud_bad_kmers;
            for ( int i = 0; i < cloud_ids.isize(); ++i ) {
              if ( cloud_bases[i] != cloud_true_bases[i] ) {
                vec<bool> is_bad_kmer( cloud_bases[i].size() - my_K + 1, false );
                for ( int j = 0; j < (int) cloud_bases[i].size(); ++j )
                  if ( cloud_bases[i][j] != cloud_true_bases[i][j] )
                    for ( int k = max(0,j-my_K+1); k <= j; k++ )
                      is_bad_kmer[k] = true;
                basevector bad_kmer;
                for ( int j = 0; j < is_bad_kmer.isize(); ++j )
                  if ( is_bad_kmer[j] ) {
                    bad_kmer.SetToSubOf( cloud_bases[i], j, my_K );
                    cloud_bad_kmers.push_back( bad_kmer );
                  }
              }
            }
	    
            #define CALL_FOR_K2(K) CALL_WRITE_FREQS(K, cloud_bad_kmers)
	    FOR_SUPPORTED_K( my_K, CALL_FOR_K2 );
            KmerFrequencyTable bad_table( my_K, frequencies_file );
            vec<longlong> bad_hist;
            bad_table.GetHistogram( bad_hist );
            
            temp_file plotdata_file( "/tmp/LR_plotdata.XXXXXX" );
            {
              ofstream plotdata_strm( plotdata_file.c_str() );
              for ( int i = 0; i < all_hist.isize(); ++i )
                plotdata_strm << all_hist[i] << "\t" << bad_hist[i] << "\n";
            }

            cout << "plotting!" << endl;

            String plot_cmd = "/usr/bin/gnuplot -";
            procbuf plot_pipe( plot_cmd.c_str(), ios::out );
            ostream plot_strm( &plot_pipe );
            
            plot_strm << "set terminal x11" << endl;
            plot_strm << "set logscale xy" << endl;
            plot_strm << "plot '" << plotdata_file << "' using 1" << endl;
            plot_strm << "replot '" << plotdata_file << "' using 2" << endl;
            
            cin.ignore(100,'\n');
            
            plot_strm << "quit" << endl;

          }

void ExtendClosures( vec<pp_pair> ppp, const vec<int>& L,
     vec< vec<pp_closure> >& ppclosures, const double dmult, 
     const int MAX_SHORT_INSERT_SEP )
{    
     // There are two passes, to allow extension on the right and left ends.

     for ( int pass = 1; pass <= 2; pass++ )
     {    vec< vec<int> > readsp;
          for ( int u = 0; u < ppp.isize( ); u++ )
               readsp.push_back( ppp[u].Left( ), ppp[u].Right( ) );
          vec_overlap<int> over(readsp);
          for ( int i = 0; i < ppp.isize( ); i++ )
          {    for ( int j = 0; j < ppclosures[i].isize( ); j++ )
               {    pp_closure& c = ppclosures[i][j];
                    try_again:
                    static vec< pair<int,int> > overlaps;
                    over.GetOverlaps( c, overlaps );
                    vec< vec<int> > overlaps_index( readsp.size( ) );
                    for ( int v = 0; v < overlaps.isize( ); v++ )
                    {    int id = overlaps[v].first;
                         overlaps_index[id].push_back(v);    }
     
                    // Track the unipaths that could be next off the right end of c.

                    vec<int> nexts;

                    // First look for left reads that walk off the end of c and 
                    // overlap it by at least MAX_SHORT_INSERT_SEP/2.

                    for ( int l = 0; l < ppp.isize( ); l++ )
                    {    for ( int m1 = 0; m1 < overlaps_index[2*l].isize( ); m1++ )
                         {    int o1 = overlaps[ overlaps_index[2*l][m1] ].second;
                              if ( o1 + ppp[l].LeftSize( ) <= c.isize( ) ) continue;
                              if ( o1 >= 0 )
                              {    int overlap = 0;
                                   for ( int j = o1; j < c.isize( ); j++ )
                                        overlap += L[ c[j] ];
                                   if ( overlap < MAX_SHORT_INSERT_SEP/2 ) 
                                        continue;    }
                              nexts.push_back( 
                                   ppp[l].Left( c.isize( ) - o1 ) );    }    }

                    // Now look for right reads that walk off the end of c and such 
                    // that the left read could be disjoint from it.
                    // Added requirement: overlap is long.

                    for ( int l = 0; l < ppp.isize( ); l++ )
                    {    const pp_pair& p = ppp[l];
                         for ( int m2 = 0; 
                              m2 < overlaps_index[ 2*l + 1 ].isize( ); m2++ )
                         {    int o2 = 
                                   overlaps[ overlaps_index[ 2*l + 1 ][m2] ].second;
                              if ( o2 + p.RightSize( ) <= c.isize( ) ) continue;
                              int ext = 0;
                              for ( int j = 0; j < o2; j++ )
                                   ext += L[ c[j] ];
                              if ( p.Gap( ) - ext < -dmult * p.Dev( ) ) continue;
                              if ( o2 >= 0 )
                              {    int overlap = 0;
                                   for ( int j = o2; j < c.isize( ); j++ )
                                        overlap += L[ c[j] ];
                                   if ( overlap < MAX_SHORT_INSERT_SEP/2 ) 
                                        continue;    }
                              nexts.push_back( 
                                   p.Right( c.isize( ) - o2 ) );    }    }

                    // Finally look for pairs such that both ends overlap c and the 
                    // right right walks off the end, and the gap is OK.
     
                    for ( int l = 0; l < ppp.isize( ); l++ )
                    {    const pp_pair& p = ppp[l];
                         for ( int m2 = 0; 
                              m2 < overlaps_index[ 2*l + 1 ].isize( ); m2++ )
                         {    int o2 = 
                                   overlaps[ overlaps_index[ 2*l + 1 ][m2] ].second;
                              if ( o2 + p.RightSize( ) <= c.isize( ) ) continue;
                              for ( int m1 = 0; 
                                   m1 < overlaps_index[2*l].isize( ); m1++ )
                              {    int o1 = 
                                        overlaps[ overlaps_index[2*l][m1] ].second;
                                   int start = o1 + p.LeftSize( ), stop = o2;
                                   if ( start <= stop )
                                   {    int middle = 0;
                                        for ( int j = start; j < stop; j++ )
                                             middle += L[ c[j] ];
                                        if ( Abs( p.Gap( ) - middle ) 
                                             > dmult * p.Dev( ) )
                                        {    continue;    }    }
                                   else; // FOR NOW ACCEPT THESE CASES.
                                   nexts.push_back( 
                                        p.Right( c.isize( ) - o2 ) );    }    }    }

                    // See if there is a unique next unipath.

                    UniqueSort(nexts);
                    if ( !nexts.solo( ) ) continue;

                    // Announce result.

                    cout << "Closure " << j << " of pair " << i 
                         << " could be extended to "
                         << ( pass == 1 ? "right" : "left" )
                         << " by " << BaseAlpha( nexts[0] ) << "\n";    

                    // Extend c and try again.

                    c.push_back( nexts[0] );
                    goto try_again;    }    }

          // Reverse.

          for ( int j = 0; j < ppp.isize( ); j++ )
          {    ppp[j].ReverseMe( );
               for ( int x = 0; x < ppclosures[j].isize( ); x++ )
                    ppclosures[j][x].ReverseMe( );    }    }    }
