#!/usr/bin/env python
from sys import argv
import pandas as pd
import numpy as np
from scipy.stats import mannwhitneyu
from GGR import utils
from sys import argv

binding = argv[1]
peak_sets = argv[2].split(",")
names = argv[3].split(",")

binding = pd.read_csv(binding, index_col=0, sep="\t")

sets_d = {}
for peak_set, name in zip(peak_sets, names):
    sets_d[name] = utils.read_file_return_set(peak_set)

binding_by_set = {}
for name in sets_d.keys():
    binding_by_set[name] = np.array(binding.ix[list(sets_d[name])].mean(axis=1))

for k1 in binding_by_set.keys():
    for k2 in binding_by_set.keys():
        if k1 == k2:
            continue
        _,p = mannwhitneyu(binding_by_set[k1], binding_by_set[k2])
        print "%s vs. %s, MWU, p = %0.2e"%(k1, k2, p)
        