/////////////////////////////////////////////////////////////////////////////
//                   SOFTWARE COPYRIGHT NOTICE AGREEMENT                   //
//       This software and its documentation are copyright (2005) by the   //
//   Broad Institute/Massachusetts Institute of Technology.  All rights    //
//   are reserved.  This software is supplied without any warranty or      //
//   guaranteed support whatsoever. Neither the Broad Institute nor MIT    //
//   can be responsible for its use, misuse, or functionality.             //
/////////////////////////////////////////////////////////////////////////////

#ifndef UNIPATH_NHOOD_H
#define UNIPATH_NHOOD_H

// File: UnipathNhood.h
//
// This file defines a toolkit for building a neighborhood (abbreviated "nhood")
// of unipaths around a given seed unipath, and identifying the reads that came from
// this neighborhood.

#include "CoreTools.h"
#include "Equiv.h"
#include "Feudal.h"
#include "Basevector.h"
#include "math/Functions.h"
#include "ReadLocation.h"
#include "ReadPairing.h"
#include "graph/Digraph.h"
#include "paths/KmerPath.h"
#include "paths/PdfEntry.h"
#include "paths/simulation/Placement.h"
#include "SemanticTypes.h"

// Logical type: pos_rel_to_seed_t
// Position relative to <neighborhood seed>.
SemanticTypeStd( int, pos_rel_to_seed_t );


// Class template: Tsepdev
//
// A sepdev defines a separation between two unspecified things, along with a 
// deviation value for it.
template<typename T>
class Tsepdev {

     public:

     typedef T value_type;

     Tsepdev( ) { }
     Tsepdev( T sep, T dev ) : sep_(sep), dev_(dev) { }
     template<typename U>
     explicit Tsepdev( U sep, U dev ) : sep_(sep), dev_(dev) { }
     // specialization later for T=int, U=float/double

     T Sep( ) const { return sep_; }
     T Dev( ) const { return dev_; }

     // things that make it easier to modify them:
     void Flip() { sep_ = -sep_; }
     void AddToSep( T t ) { sep_ += t; }

     private:

     T sep_;
     T dev_;

};

typedef Tsepdev<int> sepdev;
typedef Tsepdev<double> fsepdev;


// Templatized constructor helpers to convert to int intelligently.
template<> template<> 
inline Tsepdev<int>::Tsepdev( float sep, float dev )
  : sep_(int(round(sep))), dev_(int(ceil(dev))) { }
template<> template<> 
inline Tsepdev<int>::Tsepdev( double sep, double dev )
  : sep_(int(round(sep))), dev_(int(ceil(dev))) { }


// Class template: Tedge
//
// An edge defines a separation between two unipaths, along with a deviation value
// for it.
template<typename T>
class Tedge {

     public:

     typedef T value_type;

     int uid1;
     int uid2;
     Tsepdev<T> sepdev;

     T sep() const { return sepdev.Sep(); }
     T dev() const { return sepdev.Dev(); }

     Tedge( ) { }
     template <typename U>
     Tedge( int u1, int u2, U s, U d ) : uid1(u1), uid2(u2), sepdev(s,d) { }

     friend Bool operator<( const Tedge<T>& e1, const Tedge<T>& e2 )
     {    if ( e1.uid1 < e2.uid1 ) return True;
          if ( e1.uid1 > e2.uid1 ) return False;
          if ( e1.uid2 < e2.uid2 ) return True;
          if ( e1.uid2 > e2.uid2 ) return False;
          if ( e1.dev() < e2.dev() ) return True;
          if ( e1.dev() > e2.dev() ) return False;
          return False;    }
};

typedef Tedge<int> edge;


// Given a vec of edges, we can build a sepdev graph:
template<typename T>
void BuildGraphFromEdges( const vec< Tedge<T> >& given_edges,
			  int nuni, // number of unipaths
			  digraphE< Tsepdev<T> >& G ) {
     vec< vec<vrtx_t> > from(nuni), to(nuni);
     vec< vec<int> > from_edge_obj(nuni), to_edge_obj(nuni);
     vec< Tsepdev<T> > edges;
     for ( int i = 0; i < given_edges.isize( ); i++ )
     {    const Tedge<T>& e = given_edges[i];
          from[e.uid1].push_back(e.uid2);
          to[e.uid2].push_back(e.uid1);
          from_edge_obj[e.uid1].push_back(i);
          to_edge_obj[e.uid2].push_back(i);
          edges.push_back(  Tsepdev<T>( e.sep(), e.dev() ) );    }
     for ( int i = 0; i < nuni; i++ )
     {    SortSync( from[i], from_edge_obj[i] );
          SortSync( to[i], to_edge_obj[i] );    }
     G.Initialize( from, to, edges, to_edge_obj, from_edge_obj );
}



// FuncDecl: BuildUnipathLinkGraph
//
// Build the unipath graph, in which vertices are 
// normal unipaths and edges come from read pairs.
//
// Instantiated for sepdev and fsepdev in the .cc file.
template<class T>
void BuildUnipathLinkGraph( 

     // inputs:

     const int K,                              // as in Kmer
     const vec<read_pairing>& pairs,           // read pairs
     const vec<read_location_short>& ulocs,    // locations of reads on unipaths
     const vecvec<int>& ulocs_indexr,          // index to it by reads
     const vec<Bool>& normal,                  // is a given unipath normal
     const vec<nbases_t>& ulen,                     // unipath lengths
     const vecKmerPath& paths,                 // the reads
     const vec<unipath_id_t>& to_rc,                    // map unipath to its rc
     const int min_edge_multiplicity,          // else ignore edge

     // output:

     digraphE< Tsepdev<T> >& G,                      // the graph

     // optional:

     bool verbose = false
          );


// DiscreteRandom: given a pdf {(c,p)} where c is an integer and p is the 
// probability of that integer, choose an integer.

int DiscreteRandom( const serfvec<pdf_entry>& pdf );

Bool Linked( int x, int y, const vec<read_location_short>& ulocs,
     const vec<int>& uindex, const vec<read_pairing>& pairs, 
     const vec<int>& pairs_index );


// LinkProbability: given normal unipaths x and y, with hypothetical separation 
// s +/- d (x --> y), estimate the probability that there is at least one link from 
// x to y with separation sep +/- dev such that |sep-s#| <= 2*dev, where s# is an
// instantiation of the normal random variable corresponding to s +/- d.

// Calculate the answer.
// Actually this still uses a little bit of Monte Carlo, but only
// asks for one normally-distributed random number per iteration.
double CalcLinkProbability( int x, int y, int s, int d, const vec<int>& ulen,
     const vec<read_location_short>& ulocs, const vec<int>& uindex,
     const vecvec<pdf_entry>& cp, const vec<read_pairing>& pairs, 
     const vec<int>& pairs_index, const vecKmerPath& paths, int tries );
// Approximate the answer by Monte Carlo simulation
// In the worst case, uses (#links)^2 random numbers per iteration.
double SimLinkProbability( int x, int y, int s, int d, const vec<int>& ulen,
     const vec<read_location_short>& ulocs, const vec<int>& uindex,
     const vecvec<pdf_entry>& cp, const vec<read_pairing>& pairs, 
     const vec<int>& pairs_index, const vecKmerPath& paths, int tries );

// This can be switched between the Sim and Calc versions, or used for
// testing them against each other, etc.
double LinkProbability( int x, int y, int s, int d, const vec<int>& ulen,
     const vec<read_location_short>& ulocs, const vec<int>& uindex,
     const vecvec<pdf_entry>& cp, const vec<read_pairing>& pairs, 
     const vec<int>& pairs_index, const vecKmerPath& paths, int tries );


// Class: ustart
//
// A ustart defines a start point for a unipath, relative to a given fixed
// position (that is, relative to the <seed> of the unipath's <neighborhood>).
class ustart {

     public:

     ustart( ) { }
     ustart( int uid, int start, const vec<int>& dev )
          : uid_(uid), start_(start), cached_mean_dev_(-1), dev_(dev)  { }

     int Uid( ) const { return uid_; }

     int Start( ) const { return start_; }

     const vec<int>& Dev( ) const { return dev_; }

     int MeanDev( ) const
     { 
       if ( cached_mean_dev_ < 0 ) {
         double sqsum = 0.0;
         for ( int i = 0; i < dev_.isize( ); i++ )
           sqsum += double( dev_[i] ) * double( dev_[i] );
         cached_mean_dev_ = int(round(sqrt(sqsum)));    
       }
       return cached_mean_dev_;
     }

     friend Bool operator<( const ustart& e1, const ustart& e2 )
     {    return e1.start_ < e2.start_;    }

     struct OrderByDescendingMeanDev 
       : public binary_function<ustart,ustart,bool> {
       bool operator() ( const ustart& lhs, const ustart& rhs ) const {
         return ( lhs.MeanDev() > rhs.MeanDev() );
       }
     };

     private:

     int uid_;
     int start_;
     mutable int cached_mean_dev_;
     vec<int> dev_;

};

/**
   FuncDecl: FindUnipathNhood

   Given a seed unipath, find all nearby <normal unipaths> to form a
   <neighborhood> around this seed.
*/
void FindUnipathNhood( 

     // Inputs:

     const int v,                             // seed unipath
     const digraphE<sepdev>& G,               // graph of all normal unipaths
     const vec<int>& ulen,                    // length of each unipath
     const vecvec<pdf_entry>& cp,             // copy number pdf for unipaths
     const vec<int>& predicted_copyno,        // predicted copy number for unipaths
     const vecKmerPath& paths,                // the read paths
     const vec<read_location_short>& ulocs,   // locations of reads on unipaths
     const vec<int>& uindex,                  // index to ulocs
     const vec<read_pairing>& pairs,          // all the read pairs
     const vec<int>& pairs_index,             // index to read pairs by read ids
     const Bool FILTER_NHOOD,                 // processing option: filter?
     const int MAX_COPY_NUMBER_OTHER,         // screens unipaths entering nhood
     const int NHOOD_RADIUS,                  // how far we go away from the seed
     const int MAX_TO_CUT,                    // cutpoints longer than this are kept
     const int MAX_DEV,                       // how stretchy sep from seed can get
     const int MAX_PATHS_IN_NHOOD,            // how big the nhood can get
     const Bool BUILD_NHOOD_FW_ONLY,          // forward only?
     const int simulation_tries,              // sample size for simulations

     // Output:

     vec<ustart>& processed                   // positions of unipaths in nhood

          );

/**
   FuncDecl: PopulateNhoodWithReads

   Find the reads which go in a particular neighborhood.   Also return their orientations and predicted positions.

   Input parameters:

     int v                                  -  seed for nhood: unipath id of the seed for this nhood
     const vec<ustart>& processed           -  this defines the nhood: for each unipath in the nhood,
                                               its id and its position relative to the seed 'v'
     int K                                  -  size of kmers in terms of which all paths are defined
     const vec<int> ulen,                   -  unipath lengths, for each unipath id.
     const vec<read_pairing>& pairs         -  read pairs: what reads go into each pair
     const vec<pair_id_t>& pairs_index            -  index to read pairs: for each read, the index in 'pairs'
                                               of the read_pairing structure describing its read and its partner
     const vecKmerPath& paths               -  kmer paths for reads (the <read paths>)
     const vec<read_location_short>& ulocs  -  locations of reads on unipaths.  each element describes the location
                                               of one read on one unipath; locations of reads on a given unipath
					       are contiguous in the array.  'uindex' maps each unipath to the block
					       of read alignments to that unipath in 'ulocs'.
     const vec<int>& uindex,                -  index to ulocs: locations of reads aligned to unipath w are in
                                               ulocs[] locations [ uindex[w], uindex[w+1] ).
     const int NHOOD_RADIUS_INTERNAL        -  how far from origin we should go
     const int MAX_DEV                      -  how sloppy read locations can get
     const Bool REACH_FW_ONLY               -  only go forward?

   Output parameters:

     vec< pair<read_id_t,orient_t> >& use   -  reads in nhood, with orientations
     vec<pos_rel_to_seed_t>& usestart                     -  predicted start of read relative to the seed 'v'
     vec<pair_id_t>& pairs_to_use                 -  the constituent pairs
   
*/
void PopulateNhoodWithReads(

     // Inputs:

     int v,                                 // seed for nhood
     const vec<ustart>& processed,          // this defines the nhood
     int K,                                 // as in Kmer
     const vec<int> ulen,                   // unipath lengths
     const vec<read_pairing>& pairs,        // read pairs
     const vec<pair_id_t>& pairs_index,           // index to read pairs
     const vecKmerPath& paths,              // kmer paths for reads
     const vec<read_location_short>& ulocs, // locations of reads on unipaths
     const vec<int>& uindex,                // index to ulocs
     const int NHOOD_RADIUS_INTERNAL,       // how far from origin we should go
     const int MAX_DEV,                     // how sloppy read locations can get
     const Bool REACH_FW_ONLY,              // only go forward?

     // Outputs:

     vec< pair<read_id_t,orient_t> >& use,  // reads in nhood, with orientations
     vec<pos_rel_to_seed_t>& usestart,      // predicted start of read rel. v
     vec<pair_id_t>& pairs_to_use                 // the constituent pairs

          );

// Find the short-insert read pairs such that each of its reads could be
// completely contained in a contig created from the reads in a populated nhood.

void GetShortInsertReads(

     // Inputs:

     vec< pair<read_id_t,orient_t> >& use,            // reads in nhood, with orientations
     const vec<tagged_rpint>& pathsdb,      // paths database for reads
     const vecKmerPath& paths,              // kmer paths for reads
     const vecKmerPath& paths_rc,           // kmer paths for rc of reads

     const vec<nbases_t>& PATH_KS,
     const vec< vec<tagged_rpint> >& extra_paths_db,
     const vec< vecKmerPath >& extra_paths,
     const vec< vecKmerPath >& extra_paths_rc,			   
     
     const vec<read_id_t>& partner,               // map read to partner
     const vec<Bool>& is_short_pair_read,   // is read an end of a short-insert pair
     

     // Output:

     vec< pair<read_id_t,orient_t> >& P               // the short insert reads, orientations

          );

// Print a unipath neighborhood.  Argument "locs" should be valid if USE_TRUTH is true.

void PrintNhood( int v, const vec<ustart>& processed, const vec<int>& ulen,
     Bool USE_TRUTH, const vecvec<placement>* locs );

/// FuncDecl: FillInTransitiveEdges
///
/// For each vertex of predicted-copy-number one in the graph, join it
/// to other vertices whose separation will be within radius of that
/// vertex.  But this naive version won't find connections which
/// require passing though multiple vertices further than distance
/// radius away.
///
/// An edge we construct will replace an existing edge with the same
/// endpoints if the new one has smaller deviation.
///
/// Do not introduce new edges having deviation > max_dev.
///
/// Do not replace an edge unless its deviation decreases by percent_improvement.
template<class T>   // defined for int and double
void FillInTransitiveEdges( digraphE< Tsepdev<T> >& graph, 
			    const int radius, 
                            const double max_dev,
                            const double percent_improvement,
			    const vec<int>& predicted_copyno,
			    const vec<nbases_t>& unipath_len, 
			    int verbosity = 0,
			    vecvec<placement>* locs_p = NULL );

#endif
