#!/usr/bin/env python
import numpy as np
import matplotlib
font = {'size'   : 10}
matplotlib.rc('font', **font)
import matplotlib.pyplot as plt
from GGR import utils
from sys import argv
import pandas as pd
import subprocess

sites_up = argv[1].split(",")
sites_down = argv[2].split(",")
peaks = argv[3].split(",")
names = argv[4].split(",")
FDR = argv[5]
outplot = argv[6]
out_df = argv[7]

# sites_up = "/data/reddylab/projects/GGR/results/chip_seq/differential_binding/iter0/delta_peaks/BCL3.num_sig_peaks_up_novel_per_timepoint_by_FDR.txt,/data/reddylab/projects/GGR/results/chip_seq/differential_binding/iter0/delta_peaks/CEBPB.num_sig_peaks_up_novel_per_timepoint_by_FDR.txt,/data/reddylab/projects/GGR/results/chip_seq/differential_binding/iter0/delta_peaks/cJun.num_sig_peaks_up_novel_per_timepoint_by_FDR.txt,/data/reddylab/projects/GGR/results/chip_seq/differential_binding/iter0/delta_peaks/CTCF.num_sig_peaks_up_novel_per_timepoint_by_FDR.txt,/data/reddylab/projects/GGR/results/chip_seq/differential_binding/iter0/delta_peaks/EP300.num_sig_peaks_up_novel_per_timepoint_by_FDR.txt,/data/reddylab/projects/GGR/results/chip_seq/differential_binding/iter0/delta_peaks/FOSL2.num_sig_peaks_up_novel_per_timepoint_by_FDR.txt,/data/reddylab/projects/GGR/results/chip_seq/differential_binding/iter0/delta_peaks/GR.num_sig_peaks_up_novel_per_timepoint_by_FDR.txt,/data/reddylab/projects/GGR/results/chip_seq/differential_binding/iter0/delta_peaks/H3K27ac.num_sig_peaks_up_novel_per_timepoint_by_FDR.txt,/data/reddylab/projects/GGR/results/chip_seq/differential_binding/iter0/delta_peaks/H3K4me1.num_sig_peaks_up_novel_per_timepoint_by_FDR.txt,/data/reddylab/projects/GGR/results/chip_seq/differential_binding/iter0/delta_peaks/H3K4me2.num_sig_peaks_up_novel_per_timepoint_by_FDR.txt,/data/reddylab/projects/GGR/results/chip_seq/differential_binding/iter0/delta_peaks/H3K4me3.num_sig_peaks_up_novel_per_timepoint_by_FDR.txt,/data/reddylab/projects/GGR/results/chip_seq/differential_binding/iter0/delta_peaks/H3K9me3.num_sig_peaks_up_novel_per_timepoint_by_FDR.txt,/data/reddylab/projects/GGR/results/chip_seq/differential_binding/iter0/delta_peaks/HES2.num_sig_peaks_up_novel_per_timepoint_by_FDR.txt,/data/reddylab/projects/GGR/results/chip_seq/differential_binding/iter0/delta_peaks/JunB.num_sig_peaks_up_novel_per_timepoint_by_FDR.txt".split(",")
# sites_down = "/data/reddylab/projects/GGR/results/chip_seq/differential_binding/iter0/delta_peaks/BCL3.num_sig_peaks_down_novel_per_timepoint_by_FDR.txt,/data/reddylab/projects/GGR/results/chip_seq/differential_binding/iter0/delta_peaks/CEBPB.num_sig_peaks_down_novel_per_timepoint_by_FDR.txt,/data/reddylab/projects/GGR/results/chip_seq/differential_binding/iter0/delta_peaks/cJun.num_sig_peaks_down_novel_per_timepoint_by_FDR.txt,/data/reddylab/projects/GGR/results/chip_seq/differential_binding/iter0/delta_peaks/CTCF.num_sig_peaks_down_novel_per_timepoint_by_FDR.txt,/data/reddylab/projects/GGR/results/chip_seq/differential_binding/iter0/delta_peaks/EP300.num_sig_peaks_down_novel_per_timepoint_by_FDR.txt,/data/reddylab/projects/GGR/results/chip_seq/differential_binding/iter0/delta_peaks/FOSL2.num_sig_peaks_down_novel_per_timepoint_by_FDR.txt,/data/reddylab/projects/GGR/results/chip_seq/differential_binding/iter0/delta_peaks/GR.num_sig_peaks_down_novel_per_timepoint_by_FDR.txt,/data/reddylab/projects/GGR/results/chip_seq/differential_binding/iter0/delta_peaks/H3K27ac.num_sig_peaks_down_novel_per_timepoint_by_FDR.txt,/data/reddylab/projects/GGR/results/chip_seq/differential_binding/iter0/delta_peaks/H3K4me1.num_sig_peaks_down_novel_per_timepoint_by_FDR.txt,/data/reddylab/projects/GGR/results/chip_seq/differential_binding/iter0/delta_peaks/H3K4me2.num_sig_peaks_down_novel_per_timepoint_by_FDR.txt,/data/reddylab/projects/GGR/results/chip_seq/differential_binding/iter0/delta_peaks/H3K4me3.num_sig_peaks_down_novel_per_timepoint_by_FDR.txt,/data/reddylab/projects/GGR/results/chip_seq/differential_binding/iter0/delta_peaks/H3K9me3.num_sig_peaks_down_novel_per_timepoint_by_FDR.txt,/data/reddylab/projects/GGR/results/chip_seq/differential_binding/iter0/delta_peaks/HES2.num_sig_peaks_down_novel_per_timepoint_by_FDR.txt,/data/reddylab/projects/GGR/results/chip_seq/differential_binding/iter0/delta_peaks/JunB.num_sig_peaks_down_novel_per_timepoint_by_FDR.txt".split(",")
# peaks = "/data/reddylab/projects/GGR/data/chip_seq/peaks/iter0_union/BCL3.union.selected_reps.bed,/data/reddylab/projects/GGR/data/chip_seq/peaks/iter0_union/CEBPB.union.selected_reps.bed,/data/reddylab/projects/GGR/data/chip_seq/peaks/iter0_union/cJun.union.selected_reps.bed,/data/reddylab/projects/GGR/data/chip_seq/peaks/iter0_union/CTCF.union.selected_reps.bed,/data/reddylab/projects/GGR/data/chip_seq/peaks/iter0_union/EP300.union.selected_reps.bed,/data/reddylab/projects/GGR/data/chip_seq/peaks/iter0_union/FOSL2.union.selected_reps.bed,/data/reddylab/projects/GGR/data/chip_seq/peaks/iter0_union/GR.union.selected_reps.bed,/data/reddylab/projects/GGR/data/chip_seq/peaks/iter0_union/H3K27ac.union.selected_reps.bed,/data/reddylab/projects/GGR/data/chip_seq/peaks/iter0_union/H3K4me1.union.selected_reps.bed,/data/reddylab/projects/GGR/data/chip_seq/peaks/iter0_union/H3K4me2.union.selected_reps.bed,/data/reddylab/projects/GGR/data/chip_seq/peaks/iter0_union/H3K4me3.union.selected_reps.bed,/data/reddylab/projects/GGR/data/chip_seq/peaks/iter0_union/H3K9me3.union.selected_reps.bed,/data/reddylab/projects/GGR/data/chip_seq/peaks/iter0_union/HES2.union.selected_reps.bed,/data/reddylab/projects/GGR/data/chip_seq/peaks/iter0_union/JunB.union.selected_reps.bed".split(",")
# names = "BCL3,CEBPB,cJun,CTCF,EP300,FOSL2,GR,H3K27ac,H3K4me1,H3K4me2,H3K4me3,H3K9me3,HES2,JunB".split(",")
# FDR = "0.1"
# outplot = "/data/reddylab/projects/GGR/results/integrative/data_resource_manuscript/barplot_proportion_of_discoveries_over_time.png"

sites_up = {name: pd.read_csv(site, index_col=0, sep='\t')[FDR].sum() for site,name in zip(sites_up,names)}
sites_down = {name: pd.read_csv(site, index_col=0, sep='\t')[FDR].sum() for site,name in zip(sites_down,names)}
sites_total = {name: int(subprocess.check_output("wc -l < %s"%(peak), shell=True)) for peak,name in zip(peaks,names)}

sites_up = {k:v / float(sites_total[k]) for k,v in sites_up.iteritems()}
sites_down = {k:v / float(sites_total[k]) for k,v in sites_down.iteritems()}
sites_up_or_down =  {k:sites_up[k] + sites_down[k]  for k,v in sites_up.iteritems()}


sorted_keys = sorted(sites_up_or_down, key=lambda k: sites_up_or_down[k])[::-1]

y_down = [sites_down[k] for k in sorted_keys]
y_up = [sites_up[k] for k in sorted_keys]

sites_up_or_down_df =  pd.DataFrame({'total':[int(sites_total[k]) for k in sorted_keys],
                                     'total_up':[int(sites_up[k] * sites_total[k]) for k in sorted_keys],
                                     'total_down':[int(sites_down[k] * sites_total[k]) for k in sorted_keys],
                                     'total_dynamic':[int(sites_up_or_down[k] * sites_total[k]) for k in sorted_keys],
                                     'perc_up':["%0.1f"%(100 * sites_up[k]) for k in sorted_keys],
                                     'perc_down':["%0.1f"%(100 * sites_down[k]) for k in sorted_keys],
                                     'perc_dynamic':["%0.1f"%(100 * sites_up_or_down[k]) for k in sorted_keys],
                                    
                                    })
sites_up_or_down_df.index = sorted_keys

# y_abs = list(sites_up_or_down_df['total_dynamic'])
y_up_abs = list(sites_up_or_down_df['total_up'])
y_down_abs = list(sites_up_or_down_df['total_down'])

x = np.arange(len(sorted_keys))

orange = (230/255.0,159/255.0,0)
blue = (0,114/255.0,178/255.0)

fig,axes = plt.subplots(figsize=(3,2), ncols=1, nrows=2, sharex=True)

ax_proportion = axes[0]
ax_absolute = axes[1]

ax_proportion.bar(x, y_down, width=0.75, linewidth=1, color=blue, zorder=100)
ax_proportion.bar(x, y_up, bottom=y_down, width=0.75, linewidth=1, color=orange, zorder=100)

ax_absolute.bar(x, np.array(y_down_abs), width=0.75, linewidth=1, color=blue, zorder=100)
ax_absolute.bar(x, np.array(y_up_abs), bottom=np.array(y_down_abs), width=0.75, linewidth=1, color=orange, zorder=100)

for ax in [ax_proportion, ax_absolute]:
    ax.yaxis.set_ticks_position('left')
    ax.xaxis.set_ticks_position('none')
    ax.spines["right"].set_color('none')
    ax.spines["top"].set_color('none')
    ax.spines["bottom"].set_color('none')
    ax.set_xticks(x+0.75)
    ax.set_xticklabels(labels=sorted_keys, ha="right", rotation=45)
    ax.axhline(0, color='black', lw=1, zorder=1)
    ax.set_xlim((x.min()-0.5, x.max()+1))

ax_proportion.set_ylim((0, 1))
ax_proportion.set_yticks([0,0.25,0.5,0.75,1.])
ax_proportion.set_yticklabels([])

ylim = np.ceil((np.array(y_down_abs) + np.array(y_up_abs)).max()/10000.) * 10000

ax_absolute.set_ylim((0, ylim))
ax_absolute.set_yticks([0,ylim*1/4.,ylim*1/2.,ylim*3/4.,ylim])
ax_absolute.set_yticklabels([])

plt.subplots_adjust(hspace=0.02)

plt.savefig(outplot)

sites_up_or_down_df.to_csv(out_df, sep='\t', index=True)