#!/usr/bin/env python
import pandas as pd
import argparse
import json

parser = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter, \
description="""

Use a --lib_sizes json of library sizes to convert counts in --df to counts per million
(CPM) and save to --out_df.

""")

##################################################
# required args:

parser.add_argument("-i","--df", help="required, path counts matrix", required=True)
parser.add_argument("--lib_sizes", help="required, json linking sample -> lib_size", required=True)
parser.add_argument("-o","--out_df", help="required, path to output cpm matrix", required=True)

##################################################
args = parser.parse_args()

mat = pd.read_csv(args.df, sep='\t', index_col=0)

with open(args.lib_sizes, "r") as f:
    lib_sizes = json.load(f)

for c in mat.columns:
    mat[c] = mat[c] * 10**6 / float(lib_sizes[c])

mat.to_csv(args.out_df, sep='\t')