#!/usr/bin/env python
from sys import argv
import pandas as pd

mat = argv[1]
out_mat = argv[2]

# mat = "A549.GR.in.A549.GR.within_rep.union.txt"
# lib_sizes = [float(x) for x in "1801288,1726248".split(",")]
# out_mat = "EP300.in.EP300.within_rep.union.CPM.txt"

mat = pd.read_csv(mat, sep='\t', index_col=0)

new_cols = sorted(set([col.split("_")[0] for col in mat.columns]))
for new_col in new_cols:
    mat[new_col] = mat[[col for col in mat.columns if col.split("_")[0] == new_col]].sum(axis=1)

mat[new_cols].to_csv(out_mat, sep='\t')