import sys
import os
import pysam
import subprocess
import shutil
import getopt
import shlex
from itertools import izip
from collections import defaultdict, Counter

def getTotalCounts(bam_fname):
  cmd = 'samtools view -c {0}'.format(bam_fname)
  pp = subprocess.Popen(
    shlex.split(cmd),
    stdout=subprocess.PIPE,
    stderr=subprocess.PIPE,
  )
  pp.wait()
  return int(pp.stdout.read())

def getIndexStats(bam_fname,format='dict'):
  assert format in ['dict', 'list']
  cmd = 'samtools idxstats {0}'.format(bam_fname)
  pp = subprocess.Popen(
    shlex.split(cmd),
    stdout=subprocess.PIPE,
    stderr=subprocess.PIPE,
  )
  pp.wait()

  if 'fail' in pp.stderr.read():
    return None

  sumChrLen = 0 
  sumMapped = 0 
  sumUnmapped = 0 
  info_list = []
  for line in pp.stdout.readlines():
    words = line.split('\t')
    chrName = words[0]
    info_list.append(
      (
        chrName,
        (
          # contig length
          int(words[1]),
          # num mapped to this contig
          int(words[2]),
          # num unmapped
          int(words[3]),
        )
      )
    )
    sumChrLen   += int(words[1])
    sumMapped   += int(words[2])
    sumUnmapped += int(words[3])

  info_list.append((
    'sum',
    (
      sumChrLen,
      sumMapped,
      sumUnmapped,
    )
  ))
  if format == 'dict':
    return dict(info_list)
  else:
    return info_list

def pileup(
  srcFasta_path,
  regionBed_path,
  inBam_path,
  out_path,
):
  cmd = 'samtools mpileup -s -B -f {0} -l {1} {2}'.format(
    srcFasta_path,
    regionBed_path,
    inBam_path,
  )
  print cmd
  with open(out_path, 'w') as f:
    pp = subprocess.Popen(
      shlex.split(cmd),
      stdout=f,
      stderr=subprocess.PIPE,
    )
    pp.wait()
    if pp.returncode != 0:
      print 'samtools mpileup section failed'
      print 'stdout', pp.stdout.read()
      print 'stderr', pp.stderr.read()
      sys.exit(1)

def parsePileup(
  pileup_path,
):
  info_map = {}
  with open(pileup_path) as f:
    for line in f.readlines():
      # FIME may not be tab
      words = line.split('\t')
      words = line.split()
      chrName = words[0]
      # 0-based index
      #pos = int(words[1])
      pos = int(words[1]) - 1
      refBase = words[2]
      readCount = words[3]
      matches = words[4]
      bqs = words[5]
      mapqs = words[6]
      
      def getMapqIter():
        for char in mapqs:
          yield ord(char) - 33
        raise StopIteration

      def getBaseIter():
        bases = set('acgtnACGTN')
        gapCount = 0
        for (i, char) in enumerate(matches):
          if gapCount > 0:
            gapCount -= 1
          elif char == '$':
            continue
          elif char == '^':
            gapCount = 1
          elif char in '.,':
            yield refBase.lower()
          elif char == '*':
            yield 'n'
          elif char in '-+':
            digitSize = 0
            while matches[i + 1 + digitSize].isdigit():
              digitSize += 1
            gapCount = int(matches[i + 1:i + digitSize + 1]) + digitSize
          else:
            assert char in bases
            yield char.lower()
        raise StopIteration

      baseMQCounter = defaultdict(Counter)
      baseCounter = Counter()
      allMQCounter = Counter()
      for (base, mapq) in izip(
        getBaseIter(),
        getMapqIter(),
      ):
        baseMQCounter[base][mapq] += 1
        baseCounter[base] += 1
        allMQCounter[mapq] += 1

      info_map[(chrName, pos)] = {
        'refBase' : refBase,
        'baseMQCounter' : baseMQCounter,
        'baseCounter' : baseCounter,
        'allMQCounter' : allMQCounter,
      }

  return info_map

