#!/usr/bin/env python
import pandas as pd
from GGR import utils
import json
from sys import argv
from collections import defaultdict
import numpy as np

logFC_df = argv[1]
sig_sites_per_timepoint_by_FDR_in = argv[2]
sig_sites_gained_per_timepoint_by_FDR_in = argv[3]

sig_sites_up_per_timepoint_by_FDR_out = argv[4]
sig_sites_down_per_timepoint_by_FDR_out = argv[5]
sig_sites_up_gained_per_timepoint_by_FDR_out = argv[6]
sig_sites_down_gained_per_timepoint_by_FDR_out = argv[7]

num_sig_sites_up_per_timepoint_by_FDR_out = argv[8]
num_sig_sites_down_per_timepoint_by_FDR_out = argv[9]
num_sig_sites_up_gained_per_timepoint_by_FDR_out = argv[10]
num_sig_sites_down_gained_per_timepoint_by_FDR_out = argv[11]

# logFC_df = 'edgeR.sva.logFC.vs.t00.txt'
# sig_sites_per_timepoint_by_FDR_in = 'sig_sites_per_timepoint_by_FDR.json'
# sig_sites_gained_per_timepoint_by_FDR_in = 'sig_sites_gained_per_timepoint_by_FDR.json'

with open(sig_sites_per_timepoint_by_FDR_in, 'r') as f:
    sig_sites_per_timepoint_by_FDR = json.load(f)

with open(sig_sites_gained_per_timepoint_by_FDR_in, 'r') as f:
    sig_sites_gained_per_timepoint_by_FDR = json.load(f)

logFC_df = pd.read_csv(logFC_df, index_col=0, sep='\t')

sig_sites_up_per_timepoint_by_FDR = {}
sig_sites_down_per_timepoint_by_FDR = {}
sig_sites_up_gained_per_timepoint_by_FDR = {}
sig_sites_down_gained_per_timepoint_by_FDR = {}

for FDR in ['0.01', '0.05', '0.1', '0.2']:
    
    sig_sites_up_per_timepoint_by_FDR[FDR] = {}
    sig_sites_down_per_timepoint_by_FDR[FDR] = {}
    sig_sites_up_gained_per_timepoint_by_FDR[FDR] = {}
    sig_sites_down_gained_per_timepoint_by_FDR[FDR] = {}
    
    for t in sorted(sig_sites_gained_per_timepoint_by_FDR[FDR]):
        
        sites = sorted(sig_sites_per_timepoint_by_FDR[FDR][t])
        logFC = np.array(logFC_df.ix[sites][t])
        sites_down = np.array(sites)[logFC < 0]
        sites_up = np.array(sites)[logFC > 0]
        sig_sites_up_per_timepoint_by_FDR[FDR][t] = sorted(list(sites_up))
        sig_sites_down_per_timepoint_by_FDR[FDR][t] = sorted(list(sites_down))
        
    for t in sorted(sig_sites_gained_per_timepoint_by_FDR[FDR]):
        
        sites = sorted(sig_sites_gained_per_timepoint_by_FDR[FDR][t])
        logFC = np.array(logFC_df.ix[sites][t])
        sites_down = np.array(sites)[logFC < 0]
        sites_up = np.array(sites)[logFC > 0]
        sig_sites_up_gained_per_timepoint_by_FDR[FDR][t] = sorted(list(sites_up))
        sig_sites_down_gained_per_timepoint_by_FDR[FDR][t] = sorted(list(sites_down))

num_sig_sites_up_per_timepoint_df = pd.DataFrame()
num_sig_sites_down_per_timepoint_df = pd.DataFrame()
num_sig_sites_up_gained_per_timepoint_df = pd.DataFrame()
num_sig_sites_down_gained_per_timepoint_df = pd.DataFrame()

for FDR in ['0.01', '0.05', '0.1', '0.2']:
    
    num_sig_sites_up_per_timepoint_df[FDR] = [len(sig_sites_up_per_timepoint_by_FDR[FDR][k]) for k in utils.sort_by_timepoint(sig_sites_up_per_timepoint_by_FDR[FDR])]
    num_sig_sites_up_per_timepoint_df.index = utils.sort_by_timepoint(sig_sites_up_per_timepoint_by_FDR[FDR])
    num_sig_sites_down_per_timepoint_df[FDR] = [len(sig_sites_down_per_timepoint_by_FDR[FDR][k]) for k in utils.sort_by_timepoint(sig_sites_down_per_timepoint_by_FDR[FDR])]
    num_sig_sites_down_per_timepoint_df.index = utils.sort_by_timepoint(sig_sites_down_per_timepoint_by_FDR[FDR])
    
    num_sig_sites_up_gained_per_timepoint_df[FDR] = [len(sig_sites_up_gained_per_timepoint_by_FDR[FDR][k]) for k in utils.sort_by_timepoint(sig_sites_up_gained_per_timepoint_by_FDR[FDR])]
    num_sig_sites_up_gained_per_timepoint_df.index = utils.sort_by_timepoint(sig_sites_up_gained_per_timepoint_by_FDR[FDR])
    num_sig_sites_down_gained_per_timepoint_df[FDR] = [len(sig_sites_down_gained_per_timepoint_by_FDR[FDR][k]) for k in utils.sort_by_timepoint(sig_sites_down_gained_per_timepoint_by_FDR[FDR])]
    num_sig_sites_down_gained_per_timepoint_df.index = utils.sort_by_timepoint(sig_sites_down_gained_per_timepoint_by_FDR[FDR])

sig_sites_up_per_timepoint_by_FDR_out = argv[4]
sig_sites_down_per_timepoint_by_FDR_out = argv[5]
sig_sites_up_gained_per_timepoint_by_FDR_out = argv[6]
sig_sites_down_gained_per_timepoint_by_FDR_out = argv[7]

with open(sig_sites_up_per_timepoint_by_FDR_out, 'w') as f:
    json.dump(sig_sites_up_per_timepoint_by_FDR, f)

with open(sig_sites_down_per_timepoint_by_FDR_out, 'w') as f:
    json.dump(sig_sites_down_per_timepoint_by_FDR, f)

with open(sig_sites_up_gained_per_timepoint_by_FDR_out, 'w') as f:
    json.dump(sig_sites_up_gained_per_timepoint_by_FDR, f)

with open(sig_sites_down_gained_per_timepoint_by_FDR_out, 'w') as f:
    json.dump(sig_sites_down_gained_per_timepoint_by_FDR, f)

num_sig_sites_up_per_timepoint_df.to_csv(num_sig_sites_up_per_timepoint_by_FDR_out, sep='\t', index=True)
num_sig_sites_down_per_timepoint_df.to_csv(num_sig_sites_down_per_timepoint_by_FDR_out, sep='\t', index=True)
num_sig_sites_up_gained_per_timepoint_df.to_csv(num_sig_sites_up_gained_per_timepoint_by_FDR_out, sep='\t', index=True)
num_sig_sites_down_gained_per_timepoint_df.to_csv(num_sig_sites_down_gained_per_timepoint_by_FDR_out, sep='\t', index=True)
