/////////////////////////////////////////////////////////////////////////////
//                   SOFTWARE COPYRIGHT NOTICE AGREEMENT                   //
//       This software and its documentation are copyright (2007) by the   //
//   Broad Institute/Massachusetts Institute of Technology.  All rights    //
//   are reserved.  This software is supplied without any warranty or      //
//   guaranteed support whatsoever. Neither the Broad Institute nor MIT    //
//   can be responsible for its use, misuse, or functionality.             //
/////////////////////////////////////////////////////////////////////////////

#ifndef LOCALIZE_READS_ANNEX_H
#define LOCALIZE_READS_ANNEX_H

#include <set>
#include "Alignment.h"
#include "Basevector.h"
#include "CoreTools.h"
#include "Feudal.h"
#include "ReadLocation.h"
#include "ReadPairing.h"
#include "PredictionStats.h"
#include "math/HoInterval.h"
#include "paths/HyperKmerPath.h"
#include "paths/KmerBaseBroker.h"
#include "paths/KmerPath.h"
#include "paths/KmerPathMuxSearcher.h"
#include "paths/KmerPathOnHyper.h"
#include "paths/PairedPair.h"
#include "paths/UnipathNhood.h"
#include "paths/simulation/Placement.h"
#include "paths/simulation/SimTrueSequenceBroker.h"

void AlignNhoodStuffToReference( const vecbasevector& seqs, int v, int NHOOD_RADIUS,
     const vecvec<placement>& locs, const vecbasevector& genome,
     const vec<int>& genome_path_lengths, vec<alignment_plus>& aligns );

void PlacePairsOnPaths( 
     /* inputs: */
     const vec<read_pairing>& pairs, const vecKmerPath& paths,
     const vecKmerPath& paths_rc, const vecKmerPath& upaths, 
     const vec<Bool>& upaths_to_use, const vec<tagged_rpint>& upathsdb, const int K,
     /* outputs: */
     vec< vec<int> >& u, vec< vec<KmerPathLoc> >& start, 
     vec< vec<KmerPathLoc> >& stop );

void PickCover( const vec<ho_interval>& I, vec<int>& C );

String VecSummary( const vec<int>& v );

void CheckCoverage( const placement& p, int K, const vec< pair<int,Bool> >& use, 
     int start, int stop, const vec<read_location>& readlocs, 
     const vec<int>& readlocs_index, const vec<int>& genome_path_lengths, 
     int NHOOD_RADIUS_INTERNAL, int& sim_tried, int& sim_covered,
     vec<ho_interval>& rcov );

void RenumberKmersInUnipaths( const vec<KmerPath>& localUnipaths, 
                              const KmerBaseBroker& localKBB,
                              const String& wrun_dir,
                              vecKmerPath& newUnipaths,
                              KmerBaseBroker& newlocalKBB );


void WalkLongInserts( vec<HyperKmerPath>& hypers,
                      const vec<pp_pair>& long_inserts, 
                      const vec<read_pairing>& long_insert_orig_pairs, 
                      const vec<HyperPairPlacement>& long_insert_hpairs,
                      const vec< vec<pp_closure> >& ppclosures,
                      const vecKmerPath& newUnipaths,
                      const KmerBaseBroker& newlocalKBB,
                      const int K, const int LONG_INSERT_WALK_K,
                      const int SD_MULT, const int MAX_PSEUDO,
                      const int search_limit,
                      const longlong answer_size_limit,
                      const int LONG_INSERT_MIN_READ_LENGTH,
                      const int LONG_INSERT_WALK_VERBOSITY, 
                      const Bool EVALUATE_INSERT_HYPER,
                        const vec<read_location>& readlocs,
                        const vec<int>& readlocs_index,
                        const String& data_dir,
                        const String& wdata_dir,
                        const vecbasevector& genome,
                      const int verbosity );


void MergeNeighborhood( HyperBasevector& nhoodHBV, 
                        const vec<HyperKmerPath>& hypers,
                        const KmerBaseBroker& newlocalKBB,
                         const Bool PRINT_HYPER_BEFORE_FIRST_MERGE,
                        const Bool FIRST_MERGE,
                         const Bool PRINT_HYPER_AFTER_FIRST_MERGE,
                         const Bool PRINT_HYPER_BEFORE_SECOND_MERGE,
                        const Bool SECOND_MERGE,
                        // SECOND_MERGE params
                          const int MIN_OVERLAP,
                          const int MIN_PROPER_OVERLAP,
                         const Bool PRINT_HYPER_AFTER_SECOND_MERGE,
                        const Bool FIRST_DELOOP,
                        // FIRST_DELOOP add'l input
                          const vec< pair<int,Bool> >& secondaryCloud,
                          const vecbasevector& reads,
                          const vec<read_pairing>& pairs,
                          const vec<int>& pairs_index,
                          const Bool FIRST_DELOOP_VERBOSE,
                         const Bool PRINT_HYPER_AFTER_FIRST_DELOOP,
                        const Bool EVALUATE_NHOOD_HYPER,
                        // EVALUATE_NHOOD_HYPER add'l input
                          const String& data_dir,
                          const String& wdata_dir,
                          const vecbasevector& genome );

void WalkInserts( const vec<int>& to_id, const vec< pair<int,Bool> >& use, 
     KmerPathMuxSearcher& searcher, int nps, const vec<Bool>& extended_far_enough, 
     const vec<int>& sleft_add, const vec<int>& pairs_sample, 
     const vec<read_pairing>& pairs, const vec<read_location>& readlocs, 
     const vec<int>& readlocs_index, const vecvec<placement>& locs, 
     int SD_MULT, const vec<int>& readLengths, int MAX_PSEUDO, int MAX_CLOSURES, 
     int verbosity, const vecKmerPath& pathsFw, const vecKmerPath& pathsRc, 
     const SubsumptionList* theSubList,
     Bool decompose_truth, Bool decompose_truth_brief, 
     SimTrueSequenceBroker& trueSeq, Bool USE_TRUTH, int v, KmerBaseBroker* kbb, 
     const vecbasevector& genome, vec<int>& insert_ids, int K, 
     int MIN_OVERLAP, int MIN_PROPER_OVERLAP, vec<HyperBasevector>& hyperbases,
     const vec< pair<int,Bool> >& P, const vec<read_pairing>& PAIRS,
     const vec<int>& PAIRS_INDEX, const vecbasevector& reads, int K_orig,
     const String& sub_dir, const String& wdata_dir, Bool FIRST_MERGE,
     Bool FIRST_DELOOP, const vecbasevector& sreads, Bool found_universal,
     Bool PRINT_HYPER_BEFORE_FIRST_MERGE, Bool PRINT_HYPER_AFTER_FIRST_MERGE,
     Bool PRINT_HYPER_BEFORE_SECOND_MERGE, Bool PRINT_HYPER_AFTER_SECOND_MERGE,
     Bool FIRST_DELOOP_VERBOSE, Bool EVALUATE_NHOOD_HYPER, const String& data_dir );

void PropagateStarts( const pp_pair& p, const HyperKmerPath& h,
     const vec<Bool>& hstart_known, const vec<Bool>& unique_edge, 
     const vec<int>& hstart, Bool& start_known, vec<int>& lstart, vec<int>& rstart );

void FindFalseReads( const vec<pp_pair>& ppp, const HyperKmerPath& h,
     const vecbasevector& genome, const KmerBaseBroker& kbb,
     int v, int NHOOD_RADIUS, const vecvec<placement>& locs,
     const vec<int>& genome_path_lengths, Bool verbose );

void TrimPairs( int v, const vec<int>& ulen, const HyperKmerPath& h, 
     vec<pp_pair>& ppp, const vec<int>& pppL, const vec<Bool>& hstart_known, 
     const vec<int>& hstart, vec<Bool>& unique_edge, 
     const int NHOOD_RADIUS_INTERNAL, const double TRIM_MULTIPLIER, 
     Bool& found_universal, Bool BASIC_DEBUG );

Bool cmp_align2( const alignment_plus& ap1, const alignment_plus& ap2 );

void AddCopyNumberOne( const int v, vec<ustart>& processed, 
     const vec<int>& predicted_copyno, const digraphE<sepdev>& Gplus, 
     const vec<int>& ulen, const int NHOOD_RADIUS, const double MAX_DEV );

void PrintTruthPlacement( const int v, const vecvec<placement>& locs,
     const vecbasevector& genome, const int NHOOD_RADIUS_INTERNAL,
     const HyperKmerPath& h, const KmerBaseBroker& kbbnb,
     const vec<int>& edge_copyno );

void TracePairs( const vecbasevector& sreads, const vec<pp_pair>& ppp,
     const HyperKmerPath& h, const KmerBaseBroker& kbbnb,
     const vec<read_location>& slocs, const vec<read_pairing>& spairs );

void PrintPairs( const vec<pp_pair>& ppp, const vec<Bool>& hstart_known,
     const vec<int>& hstart, const vec<Bool>& unique_edge );

void CheckReferenceCoverage( const int v, const vec< vec<pp_closure> >& ppclosures,
     const HyperKmerPath& h, const KmerBaseBroker& kbbnb, const int NHOOD_RADIUS,
     const vecvec<placement>& locs, const vecbasevector& genome,
     const vec<int>& genome_path_lengths, const Bool BASIC_DEBUG,
     const vec<pp_pair>& ppp, const int SHOW_PAIR_PLACEMENTS, 
     const String& sub_dir );

void ProcessUniversalClosure( const vec<pp_pair>& ppp, const HyperKmerPath& h,
     const vec<read_pairing>& spairs, const vec<Bool>& primary,
     const vecKmerPath& xpaths, const vecKmerPath& xpaths_rc,
     const KmerBaseBroker& kbbnb, const Bool EVALUATE_NHOOD_HYPER,
     const String& wrun_dir, const String& data_dir,      
     vec<HyperBasevector>& hyperbases );

void ReportTrueCoverageGaps( const vecbasevector& genome,
     const vec<read_location>& readlocs, const int K,      
     const vec<Bool>& is_short_pair_read );

void ReportCoverage( const serfvec<placement>& locsv, int K,
     const vec< pair<int,Bool> >& use, const vec< pair<int,Bool> >& P,
     const vec<read_location>& readlocs, const vec<int>& readlocs_index,
     const vec<int>& genome_path_lengths, int NHOOD_RADIUS_INTERNAL, int& sim_tried,
     int& sim_covered, int& sim_tried_sh, int& sim_covered_sh );

void EdgeCopyNumber( const HyperKmerPath& h, const vec<int>& fw_reads_orig,
     const vec<int>& rc_reads_orig, const vecKmerPath& zpaths,
     const vecKmerPath& zpaths_rc, const vec<read_location_short>& ulocs, 
     const vecvec<int>& ulocs_indexr, const vec<int>& predicted_copyno,
     const vec<double>& predicted_copyno_p, int EDGE_MIN, 
     const vec<int>& readLengths, const vec<int>& ulen,
     const vec<tagged_rpint>& edgedb,
     /* outputs: */ vec<int>& edge_copyno, vec<double>& edge_copyno_p,
     vec<Bool>& unique_edge );

// Type: KmerAdjSet
// The set of all <kmer adjacencies> in a given neighborhood (either in the genome,
// or in the reads localized to one neighborhood).
typedef set< pair< kmer_id_t, kmer_id_t > > KmerAdjSet;

/**
   FuncDecl: GetLocalizedReadsKmerAdjacencies

   For all reads <localized> to a given <neighborhood>, gather the set of <kmer adjacencies>
   represented by these reads.  This set should approximate the set returned by
   <GetGenomicKmerAdjacencies()>, and we can measure how well the approximation works.
   This in turn gives us an idea of how well localization of reads to neighborhoods is working.
*/
void GetLocalizedReadsKmerAdjacencies(// input
				      unipath_id_t neighborhoodSeed, nbases_t K,
				      const vecKmerPath& paths,
				      const vecKmerPath& paths_rc,
				      const vec< pair<read_id_t,orient_t> >& readsLocalizedToNeighborhood,
				      const vec<Bool> *readsToInclude,
			       
				      // output
				      KmerAdjSet& localizedReadsKmerAdjs );
				      
				      
/**
   FuncDecl: GetGenomicKmerAdjacencies

   For a given neighborhood, take its alignment to the reference, and get the set of
   <kmer adjacencies> on that strand of the reference.
*/
void GetGenomicKmerAdjacencies(// input
			       unipath_id_t seed, nbases_t K, const vecKmerPath& genome_paths, const vecKmerPath& genome_paths_rc,
			       const vecKmerPathIndex& genome_paths_idx,
			       const vecvec<placement>& unipathPlacementsOnGenome,
			       const vec<nkmers_t IdxBy(unipath_id_t)>& unipathLengthsInKmers,
			       nbases_t NHOOD_RADIUS_INTERNAL,
			       
			       // output
			       KmerAdjSet& localizedReadsKmerAdjs );



/**
   FuncDecl: ComputeKmerAdjacencyPredictionStats

   For a given neighborhood, gather stats on how well the reads localized to that
   neighborhood predict the <kmer adjacencies> actually occurring in the neighborhood.
*/
void ComputeKmerAdjacencyPredictionStats( // inputs:
					  const KmerAdjSet& adjacenciesInLocalizedReads,
					  const KmerAdjSet& adjacenciesInGenomicNeighborhood,
					  // outputs:
					  PredictionStats& kmerAdjPredictionStats );



/**
   FuncDecl: EvaluateKmerAdjPredictionForNeighborhood

   Print stats on how accurately we localize reads to neighborhoods, by seeing how well the
   set of kmer adjacencies in reads localized to each CN1 neighborhood matches up with the
   true set of kmer adjacencies in that neighborhood (from the <reference>).
*/
void EvaluateKmerAdjPredictionForNeighborhood( // input
					      unipath_id_t v,  // neighborhood seed
					      unsigned int seeds_processed,
					      nbases_t K,
					      const vecKmerPath& genome_paths, const vecKmerPath& genome_paths_rc,				      
					      const vecKmerPathIndex& genome_paths_idx,
					      const vecKmerPath& paths,
					      const vecKmerPath& paths_rc,
					      const vecvec<placement>& unipathPlacementsOnGenome,
					      const vec<nkmers_t>& unipathLengthsInKmers,
					      nbases_t NHOOD_RADIUS_INTERNAL,
					      const vec<int>& predicted_copyno,
					      const String& run_dir,
					      const vec<Bool>& is_short_pair_read,
					      Bool USING_READLOCS,
					      const vec<read_location>& readlocs,
					      const vec<int>& readlocs_index,
					      const vec< pair<read_id_t,orient_t> >& primaryReadCloud,
					      const vec< pair<read_id_t,orient_t> >& secondaryReadCloud );


#endif
// #ifndef LOCALIZE_READS_ANNEX_H
