#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import sys
import numpy as np
import matplotlib
matplotlib.use('Agg')  # do not require X window
import matplotlib.pyplot as plt

def error(string, error_type=1):
    sys.stderr.write(f'ERROR: {string}\n')
    sys.exit(error_type)


def log(string, newline_before=False):
    if newline_before:
        sys.stderr.write('\n')
    sys.stderr.write(f'LOG: {string}\n')

#####


log('Reading in stats ...')

files = sys.argv[1:]


hopo_pcts = {}
bases = {}
lengths = {}

for file in files:
    with open(file) as infh:
        for line in infh:
            if ':: After deletion:' in line:
                hopo_pct = line.strip().rsplit('\t', 1)[-1]
                hopo_pcts[file] = float(hopo_pct)

            if ':: Bases:' in line:
                bases[file] = {}
            for base in ['A', 'T', 'G', 'C']:
                if f':: {base}: ' in line:
                    bases[file][base] = float(line.strip().rsplit(' ', 1)[-1])

            if ':: Lengths:' in line:
                lengths[file] = {}
            for leng in [2, 3, 4, 5]:
                if f':: {leng}: ' in line:
                    lengths[file][leng] = float(line.strip().rsplit(' ', 1)[-1])

        # fill in 0 if length is missing
        for leng in [2, 3, 4, 5]:
            if leng not in lengths[file]:
                lengths[file][leng] = .0


log('Plotting ...')

labs = [f.rsplit('/', 1)[-1].rsplit('.', 1)[0].replace('new1', 'WT').replace('new2', 'SL2').replace('human', 'HSA') for f in files]
print(labs)

pdata = [hopo_pcts[file] for file in files]
ldata = {size: [lengths[file][size] for file in files] for size in [2, 3, 4, 5]}
bdata = {base: [bases[file][base] for file in files] for base in ['A', 'T', 'G', 'C']}


posis = [i for i in range(len(files)*3//2) if i%3!=2]

prop_cycle = plt.rcParams['axes.prop_cycle']
colors = prop_cycle.by_key()['color']
colors = colors[:2] * len(files)


fig, axes = plt.subplots(figsize=(12, 6), ncols=3)
fig.suptitle('Statistics for deletions of size 1')

plt.sca(axes[0])
bp1 = plt.bar(posis, pdata, tick_label=labs, width=0.8, color=colors)
plt.ylim((0, 1))
plt.ylabel('portion')
plt.xticks(rotation=90)
plt.title('occured in homopolymer')
plt.legend(bp1.patches[:2], ('WT', 'SL2'), framealpha=1)
plt.grid(alpha=0.4)


plt.sca(axes[1])
axes[1].set_prop_cycle(color=['c', 'm', 'orange', 'k', 'grey'])
bot = np.zeros(len(files))
for size in [2, 3, 4, 5]:
    plt.bar(posis, ldata[size], bottom=bot, width=0.8, label=str(size))
    bot += ldata[size]
plt.bar(posis, 1-bot, bottom=bot, width=0.8, label='6+')
plt.ylim((0, 1))
plt.xticks(posis, labels=labs, rotation=90)
plt.title('homopolymer length')
plt.legend(framealpha=1)
plt.grid(alpha=0.4)


plt.sca(axes[2])
axes[2].set_prop_cycle(color=['r', 'g', 'b', 'y'])
bot = np.zeros(len(files))
for base in ['A', 'T', 'G', 'C']:
    plt.bar(posis, bdata[base], bottom=bot, width=0.8, label='U' if base=='T' else base)
    bot += bdata[base]
plt.ylim((0, 1))
plt.xticks(posis, labels=labs, rotation=90)
plt.title('homopolymer base')
plt.legend(framealpha=1)
plt.grid(alpha=0.4)



plt.savefig('deletions_homopolymer.pdf', bbox_inches='tight')
