/////////////////////////////////////////////////////////////////////////////
//                   SOFTWARE COPYRIGHT NOTICE AGREEMENT                   //
//       This software and its documentation are copyright (2006) by the   //
//   Broad Institute/Massachusetts Institute of Technology.  All rights    //
//   are reserved.  This software is supplied without any warranty or      //
//   guaranteed support whatsoever. Neither the Broad Institute nor MIT    //
//   can be responsible for its use, misuse, or functionality.             //
/////////////////////////////////////////////////////////////////////////////

/**
   \file FilterReadsByAlignTarget.cc: Write to OUT_HEAD.fasta the
   reads from READS whose only plausible alignments from QLTOUT are to
   a 'good' target (from text file GOOD_TARGETS).  Or, if UNIQUE_ONLY
   is True, select the reads that have only one plausible alignment
   which is to a good target.  Write to OUT_HEAD.qltout the
   corresponding alignments, with query_ids arranged to reflect the
   filtering, and to OUT_HEAD.qual the correspondingly filtered qual
   files.  If RENUMBER_TARGETS is True, also renumber the target ids
   to reflect their positions in the *sorted* GOOD_TARGETS file.
*/

#ifndef FORCE_DEBUG
     #define NDEBUG
#endif

#include "MainTools.h"
#include "Basevector.h"
#include "FastaFileset.h"
#include "Feudal.h"
#include "math/Functions.h"
#include "lookup/LookAlign.h"
#include "lookup/LookAlignFinder.h"
#include "lookup/VecFromLookAlign.h"
#include "SerialFeudal.h"

const char * DOC =
"Write to OUT_HEAD.fasta the reads from READS whose only plausible alignments from QLTOUT are to "
"a 'good' target (from text file GOOD_TARGETS).  Or, if UNIQUE_ONLY "
"is True, select the reads that have only one plausible alignment "
"which is to a good target.  Write to OUT_HEAD.qltout the "
"corresponding alignments, with query_ids arranged to reflect the "
"filtering, and to OUT_HEAD.qual the correspondingly filtered qual "
"files.  If RENUMBER_TARGETS is True, also renumber the target ids "
"to reflect their positions in the *sorted* GOOD_TARGETS file.";

int main( int argc, char *argv[] ) 
{
  RunTime( );

  BeginCommandArguments;
  CommandDoc(DOC);
  CommandArgument_String(READS);
  CommandArgument_String_OrDefault(QLTOUT,"");
  CommandArgument_String_OrDefault(QUAL,"");
  CommandArgument_String(GOOD_TARGETS);
  CommandArgument_String(OUT_HEAD);
  CommandArgument_Int_OrDefault_Doc(ERR_DIFF,4, "Default uniqueness check is whether the "
				    "second-best alignment's # errors is more than ERR_DIFF "
				    "more than the best alignment's.");
  CommandArgument_Bool_OrDefault_Doc(RENUMBER_TARGETS,True,"Whether to renumber targets by their"
				     "position in the *sorted* GOOD_TARGETS list.");
  CommandArgument_Bool_OrDefault_Doc(UNIQUE_ONLY,True, "Whether to include reads that are ambiguous within the set of GOOD_TARGETS though unambiguously aligning to that set.");
  CommandArgument_Bool_OrDefault_Doc(ALLOW_ALIGN_ELSEWHERE,False, "Whether to allow reads which align both to the good target as well as other targets.");
  CommandArgument_Bool_OrDefault_Doc(COMPARE_BY_RATE,True,
				     "If True, the uniqueness check is whether the second-best "
				     "alignment's error rate is more than ERR_RATE_MULTIPLE times "
				     " the best alignment's error rate.");
  CommandArgument_Double_OrDefault(ERR_RATE_MULTIPLE,4.0);
  CommandArgument_Bool_OrDefault_Doc(WRITE_ORIG_IDS,False, "Whether to write a .orig_ids file that gives the read numbers of the selected reads in the original set.");
  EndCommandArguments;
 
  if (ALLOW_ALIGN_ELSEWHERE && UNIQUE_ONLY)
    FatalErr("Incompatible options set: ALLOW_ALIGN_ELSEWHERE && UNIQUE_ONLY");

  // Read in data.
  vecbasevector reads;
  LoadReads(reads, READS);
  const int nreads = reads.size();
  cout << "Number of reads = " << nreads << "\n\n";
  
  if (QLTOUT.empty())
    QLTOUT = READS.Before(".fast") + ".qltout";

  set<int> good;
  {
    Ifstream(in, GOOD_TARGETS);
    int g;
    while (in) {
      in >> g;
      if (in)
	good.insert(g);
    }
  }
  cout << "Read " << good.size() << " good target ids.\n";

  vec<Bool> passing(nreads, False);
  order_lookalign_ErrorRate sorter;
  Ofstream(qltout, OUT_HEAD + ".qltout");
  int passed = 0;
  look_align la;
  for (LookAlignFinder finder(QLTOUT); !finder.empty(); ++finder) {
    int r = finder.QueryId();
    vec<look_align_plus> & aligns = finder.Aligns();
    if (0==aligns.size())
      continue;
    sort(aligns.begin(), aligns.end(), sorter);
    // For ALLOW_ALIGN_ELSEWHERE, the first read need not align to the good
    // one as long as one of them does align.
    bool this_align_is_to_good_target = (1==good.count(aligns[0].target_id));
    if (!ALLOW_ALIGN_ELSEWHERE && !this_align_is_to_good_target)
      continue; 
    // For each aligns[i].target_id, we check if it aligns.  If it 
    // aligns to none of the good, then this is a bad_target.
    bool never_aligned = !(ALLOW_ALIGN_ELSEWHERE && this_align_is_to_good_target);
    bool skip_this_read = false;
    for (int i=1; i<aligns.isize(); ++i) {
      this_align_is_to_good_target = (1==good.count(aligns[i].target_id));
      if (ALLOW_ALIGN_ELSEWHERE && this_align_is_to_good_target)
	never_aligned = false;
      if (!ALLOW_ALIGN_ELSEWHERE) {
	// Reject ambiguous reads: if UNIQUE_ONLY, any ambiguity is
	// bad; otherwise, ambiguity between good and bad targets is
	// bad.
	if ((UNIQUE_ONLY || !this_align_is_to_good_target)
	    && IsAmbiguous(aligns[0], aligns[i], ERR_DIFF, ERR_RATE_MULTIPLE, COMPARE_BY_RATE)) {
	  skip_this_read=true;
	  break;
	}
      }
    }
    if (ALLOW_ALIGN_ELSEWHERE && never_aligned)
      continue;
    if (skip_this_read)
      continue;
    passing[r] = True;
    for (int i=0; i<aligns.isize(); ++i) {
      if (UNIQUE_ONLY && i>0) 
	break; // We only ever output one alignment in UNIQUE_ONLY case
      if (1==good.count(aligns[i].target_id)) {
	la  = aligns[i];
	la.query_id = passed;
	if (RENUMBER_TARGETS)
	  la.target_id = distance(good.begin(), good.find(aligns[i].target_id));
	la.PrintParseable(qltout);
	la.PrintReadableBrief(qltout);
      }
    }
    ++passed;
  }
  PRINT(passed);

  {
    Ofstream(out, OUT_HEAD+".fasta");
    for (int i=0; i != nreads; ++i) {
      if (passing[i]) 
	reads[i].Print(out, i);
    }
  }

  if (!QUAL.empty()) {
    Ofstream(qout, OUT_HEAD+".qual");
    vecqualvector q;
    FastFetchQuals(q, 0, QUAL);
    for (int i=0; i != nreads; ++i) {
      if (passing[i]) 
	Print(qout, q[i], ToString(i));
    }
  }

  if (WRITE_ORIG_IDS) {
    Ofstream(out, OUT_HEAD+".orig_ids");
    for (int i=0; i != nreads; ++i) {
      if (passing[i]) 
	out << i << "\n";
    }
  }
  
  return 0;
}
