# python libraries
import os
import sys
import getopt
import shutil
import gc

import pysam
import random
import numpy as np
import re
import cPickle as pickle
import json
import logging
from collections import Counter, defaultdict
from itertools import izip
from pyfaidx import Fasta

from batch import Worker

from mlib.common import util

from mlib.bam.read import getReadQualityInfo
from mlib.bam.read import getEdits, getReadAuxInfo
from mlib.bam.output import genFilteredBam, \
  convertBamCoords, \
  colorSimBam, \
  filterDuplicates, \
  filterRids

from mlib.toolflow import picard
from mlib.toolflow import bowtie2
from mlib.toolflow import ref
from mlib.toolflow import samtools
from mlib.toolflow.fastq import fastqIter

from mlib.align.inference import InferenceEngine
from mlib.align.candcloud import CandCloud
from mlib.align.candread import CandAlignment
from mlib.align import analysis

logger = logging.getLogger(__name__)

class Aligner(Worker):

  def __init__(self, config):
    super(Aligner, self).__init__(config)

    self.debug = self.config.debug
    inputs_map = self.config.inputs
    self.simulate = inputs_map['simulate']
    self.laneID = inputs_map['laneID']
    self.wellID = inputs_map['wellID']
    if self.debug and not self.simulate:
      logger.warn('WARNING --debug specified for non simulation, turning off')
      self.debug = False

    if self.simulate:
      CandAlignment.setSimMode()

  @property
  def isSingleBarcodeRun(self):
    barcodes = self.getBarcodes()
    return len(barcodes) == 1

  def getBarcodes(self):

    barcodeSave_fname = 'barcodes-save.json'
    if os.path.isfile(barcodeSave_fname):
      return util.jsonLoadASCII(barcodeSave_fname)

    def getBarcode(qname):
      try:
        (barcode, _) = qname.split('$')
        return barcode
      except:
        return None

    fq1_path = self.config.inputs['fq1_path']
    barcodes_list = []
    barcodes_set = set()
    for (qname, _) in fastqIter(fq1_path):
      b = getBarcode(qname)
      if b in barcodes_set:
        continue
      barcodes_list.append(b)
      barcodes_set.add(b)

    with open(barcodeSave_fname , 'w') as f:
      f.write(json.dumps(barcodes_list , indent=2))

    return barcodes_list

  def getSingleBarcode(self):
    barcodes = self.getBarcodes()
    assert len(barcodes) == 1
    return barcodes[0]
    
  def scratchPath(self, barcode):
    dir = 'scratch-{0}'.format(barcode) if barcode != None else \
      'scratch-allreads'
    return os.path.join(
      dir,
    )

  #=========================================================================
  # main
  #=========================================================================
  def run(self):
    params_path = self.config.params_path
    inputs_map = self.config.inputs
    fq1_path = self.config.inputs['fq1_path']
    fq2_path = self.config.inputs['fq2_path']
    debug = False
    if 'debug' in self.config.inputs:
      debug = self.config.inputs['debug']
    backdoor = False
    if 'backdoor' in self.config.inputs:
      backdoor = self.config.inputs['backdoor']
  
    params_map = util.loadPickle(params_path)

    # pass1: align all reads single time
    #----------------------
    logger.info('TIMER START: pass1')
    (
      pass1Bam_path,
      pass1BamBCSort_path,
    ) = self.genPass1Mappings(
      fq1_path,
      fq2_path,
      debug=False,
    )
    logger.info('TIMER END: pass1')

    # split resulting alignments by barcode
    #----------------------
    barcodes = self.getBarcodes()
    # create scratch dirs per barcode
    for barcode in barcodes:
      util.mkdir_p(self.scratchPath(barcode))
    self.splitByBarcode(
      fq1_path,
      fq2_path,
      pass1Bam_path,
      pass1BamBCSort_path,
    )

    # NOTE the rest of the pipeline is mapped across barcodes
    nonempty = False
    for (i, barcode) in enumerate(barcodes):
      with util.cd(self.scratchPath(barcode)):
        if barcode != None:
          logger.info('processing barcode:{0}'.format(barcode))

        bfq1_path = 'reads_1.fq'
        bfq2_path = 'reads_2.fq'
        pass1Bam_path = 'pass1_align.bam'
        # pass2: generate multimapped candidates
        #----------------------
        logger.info('TIMER START: pass2')
        success = self.genPass2Mappings(
          bfq1_path,
          bfq2_path,
          pass1Bam_path,
          debug=False,
        )
        logger.info('TIMER END: pass2')
        # if no multimapped candidates, copy pass1 as final deliver
        if not success:
          rfaBam_fname = 'rfa.bam'
          shutil.copy(pass1Bam_path, rfaBam_fname)
          picard.createBAMIndex(rfaBam_fname)

        # generate candidate clouds + alignments
        #----------------------
        pass2PreBam_path = 'pass2pre3_align.bam'
        logger.info('TIMER START: build-cands')
        (candCloud_list, fixedCloudInfo_list) = self.getCands(
          pass2PreBam_path,
        )
        logger.info('TIMER END: build-cands')
        nonempty |= (len(candCloud_list) > 0)

        # invoke RFA
        #----------------------
        logger.info('TIMER START: rfa-inference')
        aligner = InferenceEngine(
          params_map,
          candCloud_list,
        )
        # use saved state backdoor
        backdoor_fname = 'backdoor.p'
        if backdoor:
          logger.info('    - backdoor')
          aligner.run(backdoor=True)
          aligner.backdoor(backdoor_fname)
        else:
          aligner.run()
          aligner.saveBackdoor(backdoor_fname)
        logger.info('TIMER END: rfa-inference')

        # early exit if all barcodes were empty
        if not nonempty:
          uid = '{0}_{1:03d}'.format(self.laneID, self.wellID)
          finalBam_fname = '{0}.bam'.format(uid)
          logger.info('early exit no candidate clouds, copy back of pass1 bam')
          shutil.copy(pass1Bam_path, finalBam_fname)
          finalBamIndex_fname = picard.createBAMIndex(finalBam_fname)
          self.deliver_list.append(finalBam_fname)
          self.deliver_list.append(finalBamIndex_fname)
          return

        # postprocess to generate deliverables for this barcode
        #----------------------
        logger.info('TIMER START: post-process')
        self.postprocess(
          barcode,
          aligner,
          fixedCloudInfo_list,
        )
        logger.info('TIMER END: post-process')
        ## FIXME remove
        #if i > 0:
        #  break

    # generate final results
    # by merging each of the barcode outputs
    #----------------------
    self.deliver_list.extend(
      self.getDeliverables()
    )

    logger.info('done success')
    util.memusage('done')

  #-------------------------------------------------------------------------
  # generate pass1 alignment candidates
  #-------------------------------------------------------------------------
  def genPass1Mappings(
    self,
    fq1_path,
    fq2_path,
    debug=False,
  ):
    bowtieIndex_path = self.config.get('referenceBowtieIndex_path')
  
    # pass1: align reads single time
    #----------------------
    logger.info('pass1 alignments')
    pass1Bam_path = 'pass1_align.bam'
    pass1BamBCSort_path = 'pass1_align_bcsort.bam'
    flag__alignPass1 = True
    if flag__alignPass1:
      logger.info('generate pass1 alignments')
      bowtie2.genAlignmentsMP(
        bowtieIndex_path,
        fq1_path,
        fq2_path,
        pass1Bam_path,
        alignFlags='-D 20 --non-deterministic',
        numPrePasses=1,
      )
      gc.collect()
      if debug and simulate:
        colorSimBam(pass1Bam_path)
      # skip barcode sort step if only one barcode
      if not self.isSingleBarcodeRun:
        picard.sortSam(
          pass1Bam_path,
          pass1BamBCSort_path,
          order='queryname',
        )

    return (
      pass1Bam_path,
      pass1BamBCSort_path,
    )

  #-------------------------------------------------------------------------
  # split pass1 alignments + fastqs by barcode space
  #-------------------------------------------------------------------------
  def splitByBarcode(
    self,
    fq1_path,
    fq2_path,
    pass1Bam_path,
    pass1BamBCSort_path,
  ):
    # skip split step if only a single run
    if self.isSingleBarcodeRun:
      dir = self.scratchPath(self.getSingleBarcode())
      os.symlink(fq1_path, os.path.join(dir, 'reads_1.fq'))
      os.symlink(fq2_path, os.path.join(dir, 'reads_2.fq'))
      pass1Bam_path = os.path.join(os.getcwd(), pass1Bam_path)
      os.symlink(pass1Bam_path, os.path.join(dir, 'pass1_align.bam'))
      pass1BamIdx_path = pass1Bam_path + '.bai'
      os.symlink(pass1BamIdx_path, os.path.join(dir, 'pass1_align.bam.bai'))
      return

    def getBarcode(qname):
      try:
        (barcode, _) = qname.split('$')
        return barcode
      except:
        return None

    # close current handle, open and return new one
    def newFastqOutput(f, path):
      if f != None:
        f.close()
      return open(path, 'w')

    def barcodeFqPaths(barcode):
      return (
        os.path.join(self.scratchPath(barcode), 'reads_1.fq'),
        os.path.join(self.scratchPath(barcode), 'reads_2.fq'),
      )

    base_fhandle = pysam.Samfile(pass1BamBCSort_path, 'rb')
    def newBamOutput(reads_fhandle, path):
      if reads_fhandle != None:
        reads_fhandle.close()
      return pysam.Samfile(
        path,
        'wb', 
        template=base_fhandle,
      )

    def barcodeUnsortBamPath(barcode):
      return os.path.join(self.scratchPath(barcode), 'pass1_align_unsort.bam')
    def barcodeBamPath(barcode):
      return os.path.join(self.scratchPath(barcode), 'pass1_align.bam')

    # split raw reads into groups by barcode if they are nonempty
    #----------------------
    logger.info('split fastq by barcodes')
    currbarcode = None
    f1 = None
    f2 = None
    seen_set = set()
    for ((qname1, fq1_txt), (qname2, fq2_txt)) in izip(
      fastqIter(fq1_path),
      fastqIter(fq2_path),
    ):
      assert qname1 == qname2
      nbarcode = getBarcode(qname1)

      # close previous fastq and open new one if new barcode group
      if nbarcode != currbarcode:
        # enforce ordered by barcode and each barcode only seen once
        assert nbarcode not in seen_set
        seen_set.add(nbarcode)
        logger.info('  - creating new *fastq frags for barcode: {0}'.format(
          nbarcode))
        (bfq1_path, bfq2_path) = barcodeFqPaths(nbarcode)
        f1 = newFastqOutput(f1, bfq1_path)
        f2 = newFastqOutput(f2, bfq2_path)

      currbarcode = nbarcode
      f1.write(fq1_txt)
      f2.write(fq2_txt)

    # split pass1 reads by barcode
    #----------------------
    logger.info('split pass1 bam by barcodes')
    currbarcode = None
    f = None
    seen_set = set()
    unsortBam_list = []
    for read in base_fhandle:
      nbarcode = getBarcode(read.qname)
      if nbarcode != currbarcode:
        assert nbarcode not in seen_set
        seen_set.add(nbarcode)
        logger.info('  - creating new *bam frag for barcode: {0}'.format(
          nbarcode))
        bam_path = barcodeUnsortBamPath(nbarcode)
        f = newBamOutput(f, bam_path)
        unsortBam_list.append((nbarcode, bam_path))

      currbarcode = nbarcode
      f.write(read)

    if f != None:
      f.close()
    base_fhandle.close()

    # sort pass1 *bams
    #----------------------
    logger.info('sort pass1 *bam frags')
    for (barcode, unsortBam_path) in unsortBam_list:
      sortBam_path = barcodeBamPath(barcode)
      picard.sortSam(unsortBam_path, sortBam_path)
      picard.createBAMIndex(sortBam_path)

    return

  #-------------------------------------------------------------------------
  # generate pass2 alignment candidates
  #-------------------------------------------------------------------------
  def genPass2Mappings(
    self,
    fq1_path,
    fq2_path,
    pass1Bam_path,
    debug=False,
  ):
    simulate = self.config.inputs['simulate']
    bowtieIndex_path = self.config.get('referenceBowtieIndex_path')
    refFasta_path = self.config.get('referenceFasta_path')
    MAX_EXTEND_LEN = 2000 if self.config.technology == 'moleculo' else 10000
  
    # cheat switches
    #----------------------
    flag__buildref   = True
    flag__splitfqs   = True
    flag__alignPass2 = True
    flag__reconcile  = True
  
    # build abbreviated reference from pass1 cloud locations
    #----------------------
    # map of abbrev ref coordiantes to hg19 coordinates
    # <contig-name> : [hg19 coordinaes]
    remapCoord_map = {}
    remapCoordSave_fname = 'remap-coord-save.json'
    regionsSave_fname = 'regions-save.json'

    abbrevFasta_fname = 'abbrev_ref.fasta'
    abbrevIndex_fname = abbrevFasta_fname

    abbrevBam_fname = 'pass2_align-abbrev.bam'

    if flag__buildref:
      logger.info('creating abbreviated references')
      cf = self.config.cloudFinder_cls(
        pass1Bam_path, 
        saveReads=False,
        multiMapMode=True,
      )
      # build list of coordinates
      regions_list = [(c.chrName, c.minpos, c.maxpos) for c in cf.getClouds()]

      totalClouds = len(regions_list)
      if totalClouds == 0:
        logger.warn('warning no candidate clouds from pass1')
        logger.warn('  - early exit from generating multimapped candidates')
        return False
  
      remapCoord_map = ref.buildAbbrevRef(
        regions_list,
        refFasta_path,
        abbrevFasta_fname,
        MAX_EXTEND_LEN=MAX_EXTEND_LEN,
      )
      bowtie2.createFMIndex(abbrevFasta_fname)

      # save remap coordinates and cloud region locations
      with open(remapCoordSave_fname, 'w') as f:
        f.write(json.dumps(remapCoord_map, indent=2))
      with open(regionsSave_fname , 'w') as f:
        f.write(json.dumps(regions_list , indent=2))

    else:

      logger.info('loading remap coordinates and regions list')
      remapCoord_map = util.jsonLoadASCII(remapCoordSave_fname)
      regions_list   = util.jsonLoadASCII(regionsSave_fname)
  
    # pass2: realign back to new reference
    #----------------------
    logger.info('pass2 alignments')
    pass2Pre1Bam_path = 'pass2pre1_align.bam'
    pass2Pre2Bam_path = 'pass2pre2_align.bam'
    if flag__alignPass2:
      logger.info('realigning to abbreviated references')
      bowtie2.genAlignments(
        abbrevIndex_fname,
        fq1_path,
        fq2_path,
        abbrevBam_fname,
        alignFlags='-k 14 -D 30 --non-deterministic  --score-min L,-0.6,-0.226',
      )
      # expand read alignments back to genome coordinates
      convertBamCoords(
        abbrevBam_fname,
        remapCoord_map, 
        pass1Bam_path, 
        pass2Pre1Bam_path,
      )
  
      logger.info('filtering duplicates')
      # filter duplicates from bowtie2
      filterDuplicates(
        pass2Pre1Bam_path,
        pass2Pre2Bam_path,
      )
  
    # reconcile candidates from pass1 vs pass2
    #----------------------
    pass2Pre3Bam_path = 'pass2pre3_align.bam'
    pass2Bam_path = 'pass2_align.bam'
    pass1SaveBam_path = 'pass1_align_save.bam'

    if flag__reconcile:
      # find the set of uniquely mapped reads in pass1
      logger.info('determine uniquely mapped reads in pass1')
      pass1RidLoc_map = {}
      pass1RidMapped_set = set()
      pass1_fhandle = pysam.Samfile(pass1Bam_path)
      for read in pass1_fhandle:
        rid = (read.qname, read.is_read1)
        if read.is_unmapped:
          continue
        pass1RidMapped_set.add(rid)
        if read.mapq < 20:
          continue
        readRname = pass1_fhandle.getrname(read.tid)
        pass1RidLoc_map[rid] = (readRname, read.pos)
      pass1_fhandle.close()
  
      # find the subset of these missing from pass2
      logger.info('determine alignments from pass1 missing in pass2')
      pass2RidSat_set = set()
      pass2RidMapped_set = set()
      pass2pre_fhandle = pysam.Samfile(pass2Pre2Bam_path, 'rb')
      for read in pass2pre_fhandle:
        rid = (read.qname, read.is_read1)
        if read.is_unmapped:
          continue
        pass2RidMapped_set.add(rid)
        readRname = pass2pre_fhandle.getrname(read.tid)
        if (
          rid in pass1RidLoc_map and
          pass1RidLoc_map[rid] == (readRname, read.pos)
        ):
          pass2RidSat_set.add(rid)
      pass2pre_fhandle.close()
  
      pass1SaveRid_set = (
        (pass1RidMapped_set - pass2RidMapped_set) | 
        (set(pass1RidLoc_map.keys()) - pass2RidSat_set)
      )
      logger.info('preserving {0} alignments from pass1'.format(
        len(pass1SaveRid_set)))
  
      # create multimap *bam and subset to preserve from pass1
      filterRids(
        pass2Pre2Bam_path,
        pass2Pre3Bam_path,
        pass1SaveRid_set,
        complement=True,
      )
      filterRids(
        pass1Bam_path,
        pass1SaveBam_path,
        pass1SaveRid_set,
        complement=False,
        tag='p1save',
      )
      # create final pass2 incuding preserved alignments from pass1
      picard.mergeBams(
        [
          pass2Pre3Bam_path,
          pass1SaveBam_path,
        ],
        pass2Bam_path,
      )
      picard.createBAMIndex(pass2Bam_path)

      # tag and color all resolvable multimappings 
      if debug and simulate:
        tagRid_set = set()
        colorSimBam(pass2Bam_path, tagRid_set)

    util.memusage('done creating pass2 bam')
    return True
  
  #-------------------------------------------------------------------------
  # generate input candidate clouds + candidate read alignments
  #-------------------------------------------------------------------------
  def getCands(
    self,
    pass2Bam_path,
    debug=False,
  ):
    simulate = self.config.inputs['simulate']
    refFasta_path = self.config.get('referenceFasta_path')
  
    # determine all multimapped reads cands
    #----------------------
    util.memusage('pre cloud finder')
    logger.info('determine multimapped rids')
    cf = self.config.cloudFinder_cls(pass2Bam_path, multiMapMode=True)
    ridMapCounter = Counter()
    for cloud in cf.getClouds():
      for read in cloud.getReads():
        rid = (read.qname, read.is_read1)
        ridMapCounter[rid] += 1
  
    multimapRid_set = set(filter(
      lambda(rid): ridMapCounter[rid] > 1,
      ridMapCounter.keys(),
    ))
    logger.info('  - {0} found'.format(len(multimapRid_set)))
  
    util.memusage('pre pass through clouds')
  
    ref_map = Fasta(refFasta_path)
  
    # determine corresponding candidate long reads and create CandClouds
    #----------------------
    logger.info('determine candidate clouds')

    candCloud_list = []
    fixedCloudInfo_list = []
    for (cid, cloud) in enumerate(cf.getClouds()):
      cloudRid_set = set(map(
        lambda(r): (r.qname, r.is_read1),
        cloud.getReads(),
      ))
      cloudSeq = ref_map[cloud.chrName][cloud.minpos:cloud.maxpos]
      mappings = map(
        lambda(read): CandAlignment(
          cid,
          cloud.chrName,
          read,
          getReadAuxInfo(read),
          getEdits(read, cloud.chrName, ref_map),
        ),
        cloud.getReads(),
      )
      fixedMappings = filter(
        lambda(loc): loc.rid not in multimapRid_set,
        mappings,
      )
      multimapMappings = filter(
        lambda(loc): loc.rid in multimapRid_set,
        mappings,
      )
      # create candidate cloud if multimapped read is in this cloud
      if len(cloudRid_set & multimapRid_set) > 0:
        candCloud_list.append(
          CandCloud(
            cid,
            cloud.chrName,
            fixedMappings,
            multimapMappings,
          )
        )
      # compute quality of fixed clouds and save read membership
      else:
        assert len(multimapMappings) == 0
        fixedCloud = CandCloud(
          cid,
          cloud.chrName,
          fixedMappings,
          [],
        )
        cloudFeats = fixedCloud.getFeats()
        size = cloudFeats.end - cloudFeats.begin
        cloudPenalty = InferenceEngine.getCloudPenalty(
          cloud,
          [],
          cloudFeats,
        )
  
        editSiteInfo_map = dict(filter(
          lambda(site, info): (
            (np.exp(info[0]) > 0.5) or
            (info[3] == 0 and info[2] == 1)
          ),
          InferenceEngine.getFixedEditInfo(
            fixedCloud,
            cloudFeats,
          ).items(),
        ))
  
        fixedCloudInfo_list.append(
          (
            cid,
            (cloud.chrName, cloudFeats.begin, cloudFeats.end),
            cloudRid_set,
            cloudPenalty,
            editSiteInfo_map,
          )
        )
  
    util.memusage('post pass through clouds')
    logger.info('  - {0} candidate clouds found'.format(len(candCloud_list)))
    logger.info('  - {0} fixed clouds found'.format(len(fixedCloudInfo_list)))
  
    return (candCloud_list, fixedCloudInfo_list)

  #-------------------------------------------------------------------------
  # post process rfa alignments
  #-------------------------------------------------------------------------
  def postprocess(
    self,
    barcode,
    aligner,
    fixedCloudInfo_list,
  ):
    refFasta_path = self.config.get('referenceFasta_path')
  
    # compute queries on converged assignment
    #----------------------
    rfaMappings = aligner.finalMappings
  
    logger.info('get queries for final rfa mappings')
    (
      rfaEditSiteInfo_map,
      rfaMapq_map,
      rfaCloudQuality_map,
      rfaCloudMapq_map,
    ) = aligner.getComputedQueries(rfaMappings, uid='rfa')
  
    # determine set of clouds + reads to exclude based on mappability
    rfaExcludeCid_set = aligner.getExcludeCids(rfaMappings)
    rfaExcludeRid_set = analysis.getRidsFromCids(rfaExcludeCid_set, rfaMappings)
    cloudFeats_map = aligner.extractCloudFeats(rfaMappings)
  
    # generate read tags
    #----------------------
    rfaTagRid_map = {}
    for loc in rfaMappings.values():
      domainSize = len(aligner.readScope_map[loc.rid])
      tag = 'ds:{0};'.format(domainSize)
      rfaTagRid_map[loc.rid] = tag
  
    fixedCloudNumReads_map = {}
    fixedCloudSize_map = {}
    fixedCloudLogP_map = {}
    fixedEditSiteInfo_map = {}

    # cloud tags
    #----------------------
    rfaTagCid_map = {}

    # tag fixed clouds first
    for (cid, (chrName, begin, end), rid_set, cloudLogP, editSiteInfo_map) in fixedCloudInfo_list:
      size = end - begin
      assert size > 0
      # save set of edits across all clouds
      rfaEditSiteInfo_map.update(editSiteInfo_map)

      tag = 'ctype:{0};prc:{1:4.4f};pec:{2:4.4f};nc:{3};nf:{4};s:{5};exclude:{6};bcode:{7}'.format(
        'fix',
        cloudLogP,
        0.0,
        0,
        len(rid_set),
        size,
        int(cid in rfaExcludeCid_set),
        barcode,
      )
      rfaTagCid_map[cid] = tag

    # tag candidate clouds
    for cid in aligner.allCid_set:
      tag = 'ctype:{0};prc:{1:4.4f};pec:{2:4.4f};nc:{3};nf:{4};s:{5};exclude:{6};bcode:{7}'.format(
        'cand',
        rfaCloudQuality_map[cid],
        rfaCloudMapq_map[cid],
        cloudFeats_map[cid].numCand,
        cloudFeats_map[cid].numFixed,
        cloudFeats_map[cid].size,
        int(cid in rfaExcludeCid_set),
        barcode,
      )
      rfaTagCid_map[cid] = tag

    # save regions of interest such as cloud and read positions
    logger.info(' - saving regions of interest')
    regionsInfo_map = analysis.getRegions(
      aligner,
      rfaMappings,
      rfaMapq_map,
      fixedCloudInfo_list,
    )
    util.memusage('  - done postprocess')

    # save info for valid edits
    #----------------------
    rfaEditInfo_map = {}

    for (editSite, info) in rfaEditSiteInfo_map.items():
      (chrName, pos) = editSite
      (logP_v, logP_vp, numAlt, numRef, altAllele, cid) = info
      # skip handling of indels for now
      if altAllele.lower() not in 'acgt':
        altAllele = '.'
        continue
      # include only high confidence sites or ones where only a single
      # read displaying alternate allele (for very shallow sequencing)
      if (
        (np.exp(logP_v) < 0.5) and
        not (numRef == 0 and numAlt == 1)
      ):
        continue
  
      tag = 'pr:{0};na:{1};nr:{2};'.format(
        np.exp(logP_v),
        numAlt,
        numRef,
      )
      rfaEditInfo_map[editSite] = info

    # get results (simulation only)
    #----------------------
    pass1Bam_path = 'pass1_align.bam'
    pass2Bam_path = 'pass2_align.bam'
    if self.simulate:
      logger.info('TIMER START: get-results')
      oracleMappings = aligner.getOracleMappings()
      (
        _,
        oracleMapq_map,
        oracleCloudQuality_map,
        oracleCloudMapq_map,
      ) = aligner.getComputedQueries(oracleMappings, uid="oracle")
      results_map = analysis.getResults(
        aligner,
        pass1Bam_path,
        pass2Bam_path,
        rfaMapq_map,
        oracleMapq_map,
      )
      results_map['barcode'] = barcode
      results_fname = 'results.json'
      with open(results_fname, 'w') as f:
        f.write(json.dumps(results_map, indent=2))
      logger.info('TIMER END: get-results')

    # create final bams
    #----------------------
    def forceUnmapped(read, pairUnmapped):
      read.is_unmapped = True
      read.mapq = 0
      if pairUnmapped and read.is_read1:
        read.flag = read.flag | 0x8
        read.flag = read.flag & 0xfd
      read.flag = read.flag & 0xc5
      read.cigar = None
      read.tlen = 0
      read.bin = 0 
      read.pos = -1
      read.rname = -1
      # append excluded tag
      tags = [
        ('YF', 'NS'),
        ('YT', 'UP'),
        ('AB', 'excluded'),
      ]
      read.tags = tags
      return read

    logger.info('creating deliver *bams')
    rfaPreBam_fname = 'rfa_pre.bam'
    genFilteredBam(
      pass2Bam_path,
      rfaPreBam_fname,
      rfaMappings,
      rfaMapq_map,
      tagRid_map=rfaTagRid_map,
    )
    logger.info('generating realign pre bam (excluding excluded clouds)')
    logger.info('  - {0} reads excluded'.format(len(rfaExcludeRid_set)))
    rfaPostFiltPreBam_fname = 'rfa_postfilt_pre.bam'
    genFilteredBam(
      pass2Bam_path,
      rfaPostFiltPreBam_fname,
      rfaMappings,
      rfaMapq_map,
      excludeRid_set=rfaExcludeRid_set,
      tagRid_map=rfaTagRid_map,
    )
  
    util.memusage('post generate realign bam')
  
    # extract pass2 unmapped and reads to exclude
    #-------
    logger.info('extract excluded reads + pass2 unmapped reads')
    rfaExcludeBam_fname = 'rfa_exclude.bam'
    rfaUnmapBam_fname = 'rfa_unmap.bam'
    base_fhandle = pysam.Samfile(pass2Bam_path, 'rb')
    exclude_fhandle = pysam.Samfile(
      rfaExcludeBam_fname,
      'wb', 
      template=base_fhandle,
    )
    unmap_fhandle = pysam.Samfile(
      rfaUnmapBam_fname,
      'wb', 
      template=base_fhandle,
    )
    for read in base_fhandle.fetch(until_eof=True):
      rid = (read.qname, read.is_read1)
      # use only primary alignments
      if read.is_secondary:
        continue
  
      # collect unmapped reads
      if read.is_unmapped:
        unmap_fhandle.write(read)
  
      # collect exlcuded reads and force to be unmapped
      elif rid in rfaExcludeRid_set:
        pairRid = (read.qname, not read.is_read1)
        pairUnmapped = (
          read.mate_is_unmapped or 
          pairRid in rfaExcludeRid_set
        )
        read = forceUnmapped(read, pairUnmapped)
        exclude_fhandle.write(read)
  
    exclude_fhandle.close()
    unmap_fhandle.close()
    base_fhandle.close()
  
    # merge *bam files into final form
    #-------
    logger.info('merge realignment *bams and create index')
    rfaBam_fname = 'rfa.bam'
    picard.mergeBams(
      [
        rfaPostFiltPreBam_fname,
        rfaUnmapBam_fname,
        rfaExcludeBam_fname,
      ],
      rfaBam_fname,
    )
    rfaBamIndex_fname = picard.createBAMIndex(rfaBam_fname)
    #rfaAllBam_fname = 'rfa_all.bam'
    #picard.mergeBams(
    #  [
    #    rfaPreBam_fname,
    #    rfaUnmapBam_fname,
    #  ],
    #  rfaAllBam_fname,
    #)
    #rfaAllBamIndex_fname = picard.createBAMIndex(rfaAllBam_fname)

    # create vcf for all confident variants
    #----------------------
    logger.info('create vcf fragment for all confident edit sits')
    vcf_fname = 'snvs.vcf'

    # sort edit sites to be in same order as bam
    # FIXME this is pretty hacky
    chrInfo_list = samtools.getIndexStats(pass2Bam_path, format='list')
    order_map = {}
    for (i, (chrName, _)) in enumerate(chrInfo_list):
      order_map[chrName] = i
  
    ref_map = Fasta(refFasta_path)
    with open(vcf_fname, 'w') as f:
      for (editSite, info) in sorted(
        rfaEditInfo_map.items(),
        key=lambda((chrName, pos), _): (order_map[chrName], pos),
      ):

        (chrName, pos) = editSite
        (logP_v, logP_vp, numAlt, numRef, altAllele, cid) = info
        tag = 'pr:{0};na:{1};nr:{2};'.format(
          np.exp(logP_v),
          numAlt,
          numRef,
        )
        # append corresponding cloud tag
        ctag = rfaTagCid_map[cid]
        tag += ctag
  
        f.write(('%s\t' * 7 + '%s\n') % (
          # chrom
          chrName,
          # pos (1-offset)
          pos + 1,
          # id
          '.',
          # ref
          str(ref_map[chrName][pos]).lower(),
          # alt
          altAllele,
          # qual
          '.',
          # filter
          'PASS',
          # info
          tag,
        ))


    # create *bed files for position info of reads and clouds
    #----------------------
    logger.info('saving regions of interest')
    keys = [
      'cloud-active',
      'cloud-active-exclude',
    ]
    for key in keys:
      regions_map = regionsInfo_map[key]
      def tagCloud(val_list):
        assert len(val_list) == 1
        cid = val_list[0]
        return rfaTagCid_map[cid]

      bed_fname = '{0}.bed'.format(key)
      util.writeBedFile(
        regions_map,
        bed_fname,
        score_func=tagCloud
      )

    return

  #-------------------------------------------------------------------------
  # create RFA deliverables
  #-------------------------------------------------------------------------
  def getDeliverables(self):
    deliver_list = []
    uid = '{0}_{1:03d}'.format(self.laneID, self.wellID)

    vcf_path = '{0}__snvs.vcf'.format(uid)

    # deliver
    # - realigned *bam
    # - *vcf
    # - {active, active-exclude} *bed
    # - results.json
    keys = [
      'rfa.bam',
      'snvs.vcf',
      'cloud-active.bed',
      'cloud-active-exclude.bed',
      'results.json', 
    ]

    # collect barcode fragments
    #----------------------
    barcodes = self.getBarcodes()
    fragPaths_map = defaultdict(list)
    for barcode in barcodes:
      for key in keys:
        path = os.path.join(
          self.scratchPath(barcode),
          key,
        )
        if os.path.isfile(path):
          fragPaths_map[key].append(path)

    # merge to get final *bam
    #----------------------
    logger.info('merge barcode *bams to get final deliver bam')
    finalBam_path = '{0}.bam'.format(uid)
    if len(barcodes) > 1:
      picard.mergeBams(fragPaths_map['rfa.bam'], finalBam_path)
    else:
      assert len(barcodes) == 1
      wellBam_path = fragPaths_map['rfa.bam'][0]
      shutil.move(wellBam_path, finalBam_path)
    finalBamIndex_path = picard.createBAMIndex(finalBam_path)
    deliver_list.append(finalBam_path)
    deliver_list.append(finalBamIndex_path)

    # concatenate *vcf and *bed files
    #----------------------
    concat_keys = [
      'snvs.vcf',
      'cloud-active.bed',
      'cloud-active-exclude.bed',
    ]
    for key in concat_keys:
      logger.info('merge barcode *{0} to get final deliver'.format(key))
      final_fname = '{0}_{1}'.format(uid, key)
      util.concatFiles(
        fragPaths_map[key],
        final_fname,
      )
      deliver_list.append(final_fname)

    # merge results dicts
    #----------------------
    if self.simulate:
      allResults_list = []
      key = 'results.json'
      for path in fragPaths_map[key]:
        results_map = util.jsonLoadASCII(path)
        allResults_list.append(results_map)
      finalResults_fname = '{0}_{1}'.format(uid, key)
      with open(finalResults_fname, 'w') as f:
        f.write(json.dumps(allResults_list , indent=2))
      deliver_list.append(finalResults_fname)
      
    return deliver_list

