
/**************************************************************************
 * This file is part of Celera Assembler, a software program that
 * assembles whole-genome shotgun reads into contigs and scaffolds.
 * Copyright (C) 1999-2004, The Venter Institute. All rights reserved.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received (LICENSE.txt) a copy of the GNU General Public
 * License along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 *************************************************************************/

static const char *rcsid = "$Id: AS_BAT_PopulateUnitig.C 4371 2013-08-01 17:19:47Z brianwalenz $";

#include "AS_BAT_Datatypes.H"
#include "AS_BAT_Unitig.H"
#include "AS_BAT_BestOverlapGraph.H"

#include "AS_BAT_PopulateUnitig.H"



void
populateUnitig(Unitig           *unitig,
               BestEdgeOverlap  *bestnext) {

  assert(unitig->getLength() > 0);

  if ((bestnext == NULL) || (bestnext->fragId() == 0))
    //  Nothing to add!
    return;

  ufNode frag = unitig->ufpath.back();

  //  The ID of the last fragment in the unitig, and the end we should walk off of it.
  int32 lastID  = frag.ident;
  bool  last3p  = (frag.position.bgn < frag.position.end);

  //  While there are fragments to add AND those fragments to add are not already in a unitig,
  //  construct a reverse-edge, and add the fragment.

  while ((bestnext->fragId() != 0) &&
         (Unitig::fragIn(bestnext->fragId()) == 0)) {
    BestEdgeOverlap  bestprev;

    //  Reverse nextedge (points from the unitig to the next fragment to add) so that it points from
    //  the next fragment to add back to something in the unitig.  If the fragments are
    //  innie/outtie, we need to reverse the overlap to maintain that the A fragment is forward.

    if (last3p == bestnext->frag3p())
      bestprev.set(lastID, last3p, bestnext->bhang(), bestnext->ahang());
    else
      bestprev.set(lastID, last3p, -bestnext->ahang(), -bestnext->bhang());

    //  Call the usual placement routine to place the next fragment relative to the last one.  This
    //  call depends on which end of the frag-to-be-added we are working with.

    frag.ident = bestnext->fragId();

    int32  bidx5 = -1, bidx3 = -1;

    if (unitig->placeFrag(frag, bidx5, (bestnext->frag3p() ? NULL : &bestprev),
                          frag, bidx3, (bestnext->frag3p() ? &bestprev : NULL))) {
      unitig->addFrag(frag, 0, logFileFlagSet(LOG_POPULATE_UNITIG));

    } else {

      writeLog("ERROR:  Failed to place frag %d into BOG path.\n", frag.ident);
      assert(0);
    }

    //  Set up for the next fragmnet

    lastID  = frag.ident;
    last3p  = (frag.position.bgn < frag.position.end);

    bestnext = OG->getBestEdgeOverlap(lastID, last3p);
  }
}




void
populateUnitig(UnitigVector &unitigs,
               int32 fi) {

  if ((FI->fragmentLength(fi) == 0) ||  //  Skip deleted
      (Unitig::fragIn(fi) != 0) ||      //  Skip placed
      (OG->isContained(fi) == true))    //  Skip contained
    return;

  Unitig *utg = unitigs.newUnitig(logFileFlagSet(LOG_POPULATE_UNITIG));

  //  Add a first fragment -- to be 'compatable' with the old code, the first fragment is added
  //  reversed, we walk off of its 5' end, flip it, and add the 3' walk.

  ufNode  frag;

  frag.ident             = fi;
  frag.contained         = 0;
  frag.parent            = 0;
  frag.ahang             = 0;
  frag.bhang             = 0;
  frag.position.bgn      = FI->fragmentLength(fi);
  frag.position.end      = 0;
  frag.containment_depth = 0;

  utg->addFrag(frag, 0, logFileFlagSet(LOG_POPULATE_UNITIG));

  //  Add fragments as long as there is a path to follow...from the 3' end of the first fragment.

  BestEdgeOverlap  *bestedge5 = OG->getBestEdgeOverlap(fi, false);
  BestEdgeOverlap  *bestedge3 = OG->getBestEdgeOverlap(fi, true);

  assert(bestedge5->ahang() <= 0);  //  Best Edges must be dovetail, which makes this test
  assert(bestedge5->bhang() <= 0);  //  much simpler.
  assert(bestedge3->ahang() >= 0);
  assert(bestedge3->bhang() >= 0);

  //  If this fragment is not covered by the two best overlaps we are finished.  We will not follow
  //  the paths out.  This indicates either low coverage, or a chimeric fragment.  If it is low
  //  coverage, then the best overlaps will be mutual and we'll recover the same path.  If it is a
  //  chimeric fragment the overlaps will not be mutual and we will skip this fragment.
  //
  //  The amount of our fragment that is covered by the two best overlaps is
  //
  //    (fragLen + bestedge5->bhang()) + (fragLen - bestedge3->ahang())
  //
  //  If that is not significantly longer than the fragment length, then we will not use this
  //  fragment as a seed for unitig construction.
  //

  if (OG->isSuspicious(fi))
    return;

#if 0
  uint32  covered = FI->fragmentLength(fi) + bestedge5->bhang() + FI->fragmentLength(fi) - bestedge3->ahang();

  //  This breaks unitigs at 0x best-coverage regions.  There might be a contain that spans (joins)
  //  the two best overlaps to verify the fragment, but we can't easily tell right now.
  if (covered < FI->fragmentLength(fi) + AS_OVERLAP_MIN_LEN / 2) {
    writeLog("Stopping unitig construction of suspicious frag %d in unitig %d\n",
            utg->ufpath.back().ident, utg->id());
    return;
  }
#endif

  if (logFileFlagSet(LOG_POPULATE_UNITIG))
    writeLog("Adding 5' edges off of frag %d in unitig %d\n",
            utg->ufpath.back().ident, utg->id());

  if (bestedge5->fragId())
    populateUnitig(utg, bestedge5);

  utg->reverseComplement(false);

  if (logFileFlagSet(LOG_POPULATE_UNITIG))
    writeLog("Adding 3' edges off of frag %d in unitig %d\n",
            utg->ufpath.back().ident, utg->id());

  if (bestedge3->fragId())
    populateUnitig(utg, bestedge3);

  //  Enabling this reverse complement is known to degrade the assembly.  It is not known WHY it
  //  degrades the assembly.
  //
  //utg->reverseComplement(false);
}
