#!/usr/bin/env python
import pandas as pd
from GGR import utils
import json
from sys import argv

FDR_df = argv[1]
sig_sites_per_timepoint_by_FDR_out = argv[2]
sig_sites_gained_per_timepoint_by_FDR_out = argv[3]
num_sig_sites_per_timepoint_by_FDR_out = argv[4]
num_sig_sites_gained_per_timepoint_by_FDR_out = argv[5]

FDR_df = pd.read_csv(FDR_df, index_col=0, sep='\t')
FDR_dict_sig_sites = {}
FDR_dict_sig_sites_gained_per_timepoint = {}

for FDR in [0.01, 0.05, 0.10, 0.20]:
    FDR_dict_sig_sites[FDR] = {}
    FDR_dict_sig_sites_gained_per_timepoint[FDR] = {}
    all_sig_sites = set([])
    for t in list(FDR_df.columns):
        sig_sites_per_timepoint = set(FDR_df[FDR_df[t] < FDR].index)
        
        FDR_dict_sig_sites[FDR][t] = sorted(sig_sites_per_timepoint)
        FDR_dict_sig_sites_gained_per_timepoint[FDR][t] = sorted(sig_sites_per_timepoint - all_sig_sites)
        
        all_sig_sites.update(sig_sites_per_timepoint)
    FDR_dict_sig_sites[FDR]['all'] = sorted(all_sig_sites)

num_sig_sites_per_timepoint_df = pd.DataFrame()
num_sig_sites_gained_per_timepoint_df = pd.DataFrame()

for FDR in [0.01, 0.05, 0.10, 0.20]:
    num_sig_sites_per_timepoint_df[FDR] = [len(FDR_dict_sig_sites[FDR][k]) for k in sorted(FDR_dict_sig_sites[FDR])]
    num_sig_sites_per_timepoint_df.index = sorted(FDR_dict_sig_sites[FDR])
    num_sig_sites_gained_per_timepoint_df[FDR] = [len(FDR_dict_sig_sites_gained_per_timepoint[FDR][k]) for k in utils.sort_by_timepoint(FDR_dict_sig_sites_gained_per_timepoint[FDR])]
    num_sig_sites_gained_per_timepoint_df.index = utils.sort_by_timepoint(FDR_dict_sig_sites_gained_per_timepoint[FDR])

num_sig_sites_per_timepoint_df = num_sig_sites_per_timepoint_df.ix[utils.sort_by_timepoint([k for k in FDR_dict_sig_sites[FDR] if k != 'all']) + ['all']]
assert(list(num_sig_sites_gained_per_timepoint_df.sum()) == list(num_sig_sites_per_timepoint_df.ix['all']))

with open(sig_sites_per_timepoint_by_FDR_out, 'w') as f:
    json.dump(FDR_dict_sig_sites, f)

with open(sig_sites_gained_per_timepoint_by_FDR_out, 'w') as f:
    json.dump(FDR_dict_sig_sites_gained_per_timepoint, f)

num_sig_sites_per_timepoint_df.to_csv(num_sig_sites_per_timepoint_by_FDR_out, sep='\t', index=True)
num_sig_sites_gained_per_timepoint_df.to_csv(num_sig_sites_gained_per_timepoint_by_FDR_out, sep='\t', index=True)