#!/usr/bin/env python

"""
Usage: ./markov-model file.fasta
"""

import sys
import collections
import itertools
import Bio.SeqIO

def usage():
    sys.stderr.write(__doc__.strip() + '\n\n')
    return 1


def main():
    if len(sys.argv) != 2:
        return usage()

    sequences = [str(rec.seq)
            for rec in Bio.SeqIO.parse(sys.argv[1], 'fasta')]

    codons = collections.Counter()
    transitions = collections.Counter()

    for seq in sequences:
        codons.update(seq)
        pairs = [seq[i : i + 2] for i in range(len(seq) - 1)]
        transitions.update(pairs)

    total_length = sum(map(len, sequences))
    for codon in 'ACGT':
        print '{}	{}'.format(codon, codons[codon] / float(total_length))

    all_transitions = [''.join(p) for p in itertools.product('ACGT', 'ACGT')]
    # Each sequence has N - 1 transitions
    num_transitions = total_length - len(sequences)

    for transition in all_transitions:
        print '{}	{}'.format(transition,
                transitions[transition] / float(num_transitions))


if __name__ == '__main__':
    sys.exit(main())
