#!/usr/bin/env python
from sys import argv
import pandas as pd
import numpy as np

np.random.seed(1234)

df = argv[1]
df_to_match = argv[2]
out_indices_df = argv[3]
out_indices_df_to_match = argv[4]

# df = "DNase.in.enhancers.mean.txt"
# df_to_match = "/data/reddylab/projects/GGR/results/integrative/enhancers_by_EP300_analysis/iter0/integrative/DNase.in.distal_non_p300_DHSs.mean.txt"
# out_indices_df = "/data/reddylab/projects/GGR/results/integrative/enhancers_by_EP300_analysis/iter0/integrative/DNase.in.enhancers.mean.indices_matched_to.distal_non_p300_DHSs.txt"
# out_indices_df_to_match = "/data/reddylab/projects/GGR/results/integrative/enhancers_by_EP300_analysis/iter0/integrative/DNase.in.distal_non_p300_DHSs.mean.indices_matched_to.enhancers.txt"

df = pd.read_csv(df, sep="\t", index_col=0)
df_to_match = pd.read_csv(df_to_match, sep="\t", index_col=0)

df_mean = df.mean(axis=1)
df_mean_to_match = df_to_match.mean(axis=1)

df_mean = df_mean.sort_values(ascending=False)
df_to_match = df_mean_to_match.sort_values(ascending=False)

df_mean_indices = np.array(df_mean.index)
df_mean_to_match_indices = np.array(df_mean_to_match.index)

vals = np.array(df_mean)
vals_to_match = np.array(df_mean_to_match)

df_mean_indices_to_keep = []
df_mean_to_match_indices_to_keep = []

last_val = np.inf
for i,val in enumerate(vals):
    possible_indices = df_mean_to_match_indices[(val < vals_to_match) \
                                              & (vals_to_match < last_val)]
    if len(possible_indices) > 0:
        df_mean_indices_to_keep.append(df_mean_indices[i])
        df_mean_to_match_indices_to_keep.append(np.random.choice(possible_indices))
    else:
        pass
    last_val = val

with open(out_indices_df, "w") as f:
    f.write("\n".join(sorted(df_mean_indices_to_keep)) + "\n")

with open(out_indices_df_to_match, "w") as f:
    f.write("\n".join(sorted(df_mean_to_match_indices_to_keep)) + "\n")
