#!/usr/bin/env python
import pandas as pd
import argparse

parser = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter, \
description="""

standardize_df.py

Given a tab-separated dataframe with a header (by default),
standardize each column (or row if --by_row).

""")

optional = parser._action_groups.pop()
required = parser.add_argument_group('required arguments')

##################################################
# required args:
required.add_argument("-i", "--df", 
                    help="path to input dataframe ", \
                    required=True)
required.add_argument("-o", "--out", 
                    help="path to output dataframe ", \
                    required=True)

##################################################
# optional args:

optional.add_argument("--no_header", 
                    help="if dataframe lacks a header", \
                    action='store_true')
optional.add_argument("--by_row", 
                    help="standardize by row instead of column", \
                    action='store_true')
optional.add_argument("--minmax_scaler", 
                    help="scale within (0-1)", \
                    action='store_true')
optional.add_argument("--quantile_normalize", 
                    help="quantile normalize", \
                    action='store_true')

##################################################
parser._action_groups.append(optional)
args = parser.parse_args()

header = 'infer' if not args.no_header else None
in_df = pd.read_csv(args.df, delim_whitespace=True, index_col=0, header=header)

if args.minmax_scaler:
    from sklearn.preprocessing import minmax_scale
    if not args.by_row:
        for col in in_df.columns:
            in_df[col] = minmax_scale(in_df[col].values)
    else:
        in_df = pd.DataFrame(minmax_scale(in_df, axis=1),
                             index=in_df.index, columns=in_df.columns)
elif args.quantile_normalize:
    from sklearn.preprocessing import quantile_transform
    if not args.by_row:
        for col in in_df.columns:
            in_df[col] = quantile_transform(in_df[col].values.reshape(-1, 1), 
                                            output_distribution="normal",
                                            random_state=1234).flatten()
    else:
        in_df = pd.DataFrame(quantile_transform(in_df, axis=1,
                                                output_distribution="normal",
                                                random_state=1234),
                             index=in_df.index, columns=in_df.columns)
else:
    from sklearn.preprocessing import scale
    if not args.by_row:
        for col in in_df.columns:
            in_df[col] = scale(in_df[col].values)
    else:
        in_df = pd.DataFrame(scale(in_df, axis=1),
                             index=in_df.index, columns=in_df.columns)

header = True if not args.no_header else False
in_df.to_csv(args.out, sep='\t', index=True, header=header)
