#!/usr/bin/env python
import pandas as pd
import numpy as np
import argparse

parser = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter, \
description="""

take_log_dataframe.py

Given a tab-separated dataframe with a header (by default), compute the log
of each cell.

""")

optional = parser._action_groups.pop()
required = parser.add_argument_group('required arguments')

##################################################
# required args:

required.add_argument("-i", "--df", 
                    help="path to input dataframe ", \
                    required=True)
required.add_argument("-o", "--out", 
                    help="path to output dataframe ", \
                    required=True)

##################################################
# optional args:

optional.add_argument("--no_header", 
                    help="if dataframe lacks a header", \
                    action='store_true')
optional.add_argument("--log_base", 
                    help="base of log", \
                    type=float, default=np.e)
optional.add_argument("--prior_count_for_log", 
                    help="prior count to avoid undefined values, i.e., log(0), (default: %(default)s)", \
                    type=float, default=1.)

##################################################
parser._action_groups.append(optional)
args = parser.parse_args()

header = 'infer' if not args.no_header else None
in_df = pd.read_csv(args.df, delim_whitespace=True, index_col=0, header=header)
    
in_df = np.log(in_df + args.prior_count_for_log ) / np.log(args.log_base)

header = True if not args.no_header else False
in_df.to_csv(args.out, sep='\t', index=True, header=header)
