/////////////////////////////////////////////////////////////////////////////
//                   SOFTWARE COPYRIGHT NOTICE AGREEMENT                   //
//       This software and its documentation are copyright (2005) by the   //
//   Broad Institute/Massachusetts Institute of Technology.  All rights    //
//   are reserved.  This software is supplied without any warranty or      //
//   guaranteed support whatsoever. Neither the Broad Institute nor MIT    //
//   can be responsible for its use, misuse, or functionality.             //
/////////////////////////////////////////////////////////////////////////////

// A completed insert consists of the KmerPaths from a successful walk of a
// given insert.  It knows the identity of the given insert.  It knows the 
// sensitivity which the insert walk was computed with (WalkOneInsert 
// MIN_PERFECT_MATCH) and it knows the locations of all reads which can be 
// placed on the KmerPaths.

#ifndef COMPLETED_INSERT_H
#define COMPLETED_INSERT_H

#include <set>

#include "Basevector.h"
#include "CoreTools.h"
#include "Feudal.h"
#include "ReadPairing.h"
#include "paths/HyperKmerPath.h"
#include "paths/KmerPath.h"
#include "paths/NegativeGapValidator.h"
#include "paths/PairedPair.h"
#include "paths/PileReadsOnPath.h"

class HyperKmerPath; // forward declaration

// A SloppyPathEmbedding specifies minimal information regarding an alignment
// of one KmerPath p1 to another p2: whether the left end of p1 is mapped to a kmer
// on p2 (and if so which one), and likewise for the right end.  We allow the 
// pathological case where neither the left or right end of p1 is mapped.

class SloppyPathEmbedding {

     public:

     SloppyPathEmbedding( ) { }

     SloppyPathEmbedding( Bool left_end_mapped, Bool right_end_mapped,
          const KmerPathLocAlt& left_end_to, const KmerPathLocAlt& right_end_to )
          : left_end_mapped_(left_end_mapped), right_end_mapped_(right_end_mapped),
          left_end_to_(left_end_to), right_end_to_(right_end_to) { }

     Bool LeftEndMapped( ) const { return left_end_mapped_; }
     Bool RightEndMapped( ) const { return right_end_mapped_; }

     KmerPathLocAlt LeftEndTo( ) const { return left_end_to_; }
     KmerPathLocAlt RightEndTo( ) const { return right_end_to_; }

     void SetLeftEndMapped( Bool b ) { left_end_mapped_ = b; }
     void SetRightEndMapped( Bool b ) { right_end_mapped_ = b; }

     void SetLeftEndTo( const KmerPathLocAlt& l ) { left_end_to_ = l; }
     void SetRightEndTo( const KmerPathLocAlt& r ) { right_end_to_ = r; }

     private:

     Bool left_end_mapped_, right_end_mapped_;
     KmerPathLocAlt left_end_to_, right_end_to_;

};

class SloppyReadPlacement {
     
     public:

     SloppyReadPlacement( ) { }

     SloppyReadPlacement( int read_id, Bool rc, const SloppyPathEmbedding& e )
          : read_id_(read_id), rc_(rc), e_(e) { }

     int ReadId( ) const { return read_id_; }

     Bool Rc( ) const { return rc_; }
     Bool Fw( ) const { return !rc_; }

     SloppyPathEmbedding Embedding( ) { return e_; }

     Bool LeftEndMapped( ) const { return e_.LeftEndMapped( ); }
     Bool RightEndMapped( ) const { return e_.RightEndMapped( ); }
     KmerPathLocAlt LeftLoc( ) const { return e_.LeftEndTo( ); }
     KmerPathLocAlt RightLoc( ) const { return e_.RightEndTo( ); }

     Bool BothEndsMapped( ) const { return LeftEndMapped( ) && RightEndMapped( ); }

     void Reverse( const KmerPath& p );

     private:

     int read_id_;
     Bool rc_;
     SloppyPathEmbedding e_;

};

// IsCompatible: determine if the placements of a read on two paths are 
// compatible.  This assumes that the reads have the same orientation.  True is
// returned if we're not sure what the answer is.  Also, because this only checks
// alignment extending off a single anchor, it is conceivable that it could
// incorrectly return True.

Bool IsCompatible( const KmerPath& p1, const KmerPath& p2,
     const SloppyReadPlacement& loc1, const SloppyReadPlacement& loc2 );

void BuildLocs( const KmerPath& p, 
     serfvec<SloppyReadPlacement>& theselocs, const ReadsOnPathPiler& m_piler );

class CompletedInsert {
     
     public:

     CompletedInsert( ) { }

     CompletedInsert( int id1, int id2, int min_length, int max_length, 
          int min_perfect_match, const vecKmerPath& paths, 
          const vecvec<SloppyReadPlacement>& locs ) 
          : id1_(id1), id2_(id2), min_length_(min_length), max_length_(max_length),
          min_perfect_match_(min_perfect_match)
     {    paths_ = paths;
          locs_ = locs;    }

     template<class CONTAINER_OF_KMER_PATH> // vec<KmerPath>, set<KmerPath>, etc.
     CompletedInsert( const int id1, const int id2,
          const CONTAINER_OF_KMER_PATH& closures, const int min_length, 
          const int max_length, const int min_perfect_match, const bool computeLocs,
          const ReadsOnPathPiler& m_piler )
          : id1_(id1), id2_(id2), min_length_(min_length), max_length_(max_length),
          min_perfect_match_(min_perfect_match)
     {    for ( typename CONTAINER_OF_KMER_PATH::const_iterator 
               closureIter = closures.begin();
               closureIter != closures.end(); ++closureIter )
          {    paths_.push_back( *closureIter );    }
          if (computeLocs) FillLocs(m_piler);     }

     // Constructor from paired pair data (see PairedPair.h).  This does not
     // fill out the locs.

     CompletedInsert( 

          const int id1, const int id2,             // edge ids
          const pp_pair& p,                         // the pair
          const vec<pp_closure>& closures,          // closures of the pair
          const HyperKmerPath& h,                   // the HyperKmerPath
          const double dmult                        // how much stretch was allowed
          
               );

     // FillLocs: build locs_ from scratch.

     void FillLocs( const ReadsOnPathPiler& m_piler );

     // All the paths go from read Id1( ) to the rc of read Id2( ).

     int Id1( ) const { return id1_; }
     int Id2( ) const { return id2_; }

     int MinPerfectMatch( ) const { return min_perfect_match_; }

     int MinLength( ) const { return min_length_; }
     int MaxLength( ) const { return max_length_; }

     int NPaths( ) const { return paths_.size( ); }
     const vecKmerPath& Paths( ) const { return paths_; }
     vecKmerPath& PathsMutable( ) { return paths_; }
     const KmerPath& Path( int i ) const { return paths_[i]; }

     const vecvec<SloppyReadPlacement>& Locs( ) const { return locs_; }
     vecvec<SloppyReadPlacement>& LocsMutable( ) { return locs_; }
     const serfvec<SloppyReadPlacement>& Locs( int i ) const { return locs_[i]; }
     serfvec<SloppyReadPlacement>& LocsMutable( int i ) { return locs_[i]; }

     void Reverse( );

     void BinaryRead( int fd );
     void BinaryWrite( int fd ) const;

     friend void BinaryRead( const String& filename, vec<CompletedInsert>& inserts );

     friend void BinaryWrite( 
          const String& filename, const vec<CompletedInsert>& inserts );

     // AllReads: Find all reads appearing in a vec<CompletedInsert>.

     friend void AllReads( const vec<CompletedInsert>& inserts, vec<int>& all );

     // RemoveIncompatibleReads: Find and remove all read placements in "inserts" 
     // which can be eliminated because the insert paths for the placed read's 
     // insert are incompatible with the insert path on which the read is 
     // placed.  Return the number of removed placements.  Also "modified" shows
     // which inserts are changed.

     friend int RemoveIncompatibleReads( vec<CompletedInsert>& inserts, int nreads,
          vec<Bool>& modified, int mpm = 1 );

     friend int RemoveImprobablePaths( vec<CompletedInsert>& inserts,
          int surprise_factor = 10000, int max_seq = -1 );

     // Rewalk: Rewalk the inserts using only the reads placed on them.  Return
     // number of paths which are eliminated.

     friend int Rewalk( vec<CompletedInsert>& inserts, const vec<Bool>& walk_these,
          const vecKmerPath& all_paths, const vecKmerPath& all_paths_rc,
          const String& run_dir, int K, const KmerBaseBroker& kbb,      
          const NegativeGapValidator& ngv, int mpm = 1, Bool remove_empty = True );

     // RemoveHolyPaths: Find paths which are not covered by their reads.  Note that
     // at present this will NOT detect cases where two reads appear to overlap
     // (because the end of one is after the beginning of another), but the
     // "shared bases" are actually in gaps.  Note this will also not check to see
     // if every base on a path is covered (and thus whether some bases should be
     // replaced by gaps).
     //
     // Return number of paths which are eliminated.
     //
     // This has a bunch of extra arguments which are not used at present.

     friend int RemoveHolyPaths( vec<CompletedInsert>& inserts, 
          const vec<Bool>& check_these, const vecKmerPath& all_paths, 
          const vecKmerPath& all_paths_rc, const String& run_dir, int K, 
          const KmerBaseBroker& kbb, const NegativeGapValidator& ngv, int mpm = 1,
          Bool remove_empty = True );

     // Clean: iteratively call RemoveIncompatibleReads and RemoveHolyPaths, until 
     // no further improvement is possible.  See caveats described for
     // RemoveHolyPaths.

     friend void Clean( vec<CompletedInsert>& inserts, const vecKmerPath& all_paths, 
          const vecKmerPath& all_paths_rc, const String& run_dir, int K, 
          const KmerBaseBroker& kbb, const NegativeGapValidator& ngv, 
          const vec<read_pairing>& pairs, const vec<int>& pairs_index,
          Bool verbose = False, int mpm = 1, Bool remove_empty = True,
          Bool second_order = True );

     friend void RemoveChimeras( vec<CompletedInsert>& inserts, 
          const vecKmerPath& paths, const vecKmerPath& paths_rc,
          const vec<tagged_rpint>& pathsDB );

     friend int SecondOrderIncompatible( vec<CompletedInsert>& inserts,
          const vec<read_pairing>& pairs, const vec<int>& pairs_index,
          vec<Bool>& modified );

     private:

     int id1_, id2_;
     int min_length_, max_length_;
     int min_perfect_match_;
     vecKmerPath paths_;
     vecvec<SloppyReadPlacement> locs_;

};


// Incrementally builds a file containing a vec<CompletedInsert>.
class CompletedInsertWriter 
{
 public:
  // Calls Open() on the given filename.
  CompletedInsertWriter( const String& filename,
                         const ReadsOnPathPiler& piler );

  // Calls Close().
  ~CompletedInsertWriter();

  // Opens the given file for writing, making room for the count of
  // the elements in the vector.
  void Open( const String& filename );
  
  // Updates the count of the elements in the vector and closes the file.
  void Close();

  // Writes the given closures associated with the given ids, along
  // with the auxiliary info passed in.  Increments the count of
  // elements in the vector.
  template<class CONTAINER_OF_KMER_PATH> // vec<KmerPath>, set<KmerPath>, etc
  void Write( const int id1, const int id2, 
              const CONTAINER_OF_KMER_PATH& closures,
              const int minAcceptablePathLength, 
              const int maxAcceptablePathLength,
              const int minPerfectMatch,
              const bool computeLocs = true );

 private:
  const ReadsOnPathPiler& m_piler;
  int m_fd;
  int m_count;
  int m_minLength;
  int m_maxLength;
  int m_minPerfectMatch;
};

template<class CONTAINER_OF_KMER_PATH> // vec<KmerPath>, set<KmerPath>, etc
void CompletedInsertWriter::Write( const int id1, const int id2,
                                   const CONTAINER_OF_KMER_PATH& closures,
                                   const int minAcceptablePathLength, 
                                   const int maxAcceptablePathLength,
                                   const int minPerfectMatch,
                                   const bool computeLocs )
{    CompletedInsert ci( id1, id2, closures, minAcceptablePathLength, 
          maxAcceptablePathLength, minPerfectMatch, computeLocs, m_piler );
     ++m_count;
     ci.BinaryWrite(m_fd);    }

// A CompletedInsertBasesOnly is like a CompletedInsert, but instead of carrying
// kmer paths and read locations, it just has basevectors.

class CompletedInsertBasesOnly {
     
     public:

     CompletedInsertBasesOnly( ) { }

     CompletedInsertBasesOnly( int id1, int id2, int min_length, int max_length, 
          int min_perfect_match, const vecbasevector& paths )
          : id1_(id1), id2_(id2), min_length_(min_length), max_length_(max_length),
          min_perfect_match_(min_perfect_match)
     {    paths_ = paths;    }

     // All the paths go from read Id1( ) to the rc of read Id2( ).

     int Id1( ) const { return id1_; }
     int Id2( ) const { return id2_; }

     int MinPerfectMatch( ) const { return min_perfect_match_; }

     int MinLength( ) const { return min_length_; }
     int MaxLength( ) const { return max_length_; }

     void BinaryRead( int fd );
     void BinaryWrite( int fd ) const;

     friend void BinaryRead( 
          const String& filename, vec<CompletedInsertBasesOnly>& inserts );

     int NPaths( ) const { return paths_.size( ); }
     const basevector& Sequence( int i ) const { return paths_[i]; }

     private:

     int id1_, id2_;
     int min_length_, max_length_;
     int min_perfect_match_;
     vecbasevector paths_;

};

#endif
