#!/usr/bin/env python
import pandas as pd
import argparse

parser = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter, \
description="""

subset_df_by_index.py

Given a dataframe and an index, subset dataframe by index. 
Also can sort by index if --order_by_index.
By default, dataframe is not assumed to contain header
""")

optional = parser._action_groups.pop()
required = parser.add_argument_group('required arguments')

##################################################
# required args

required.add_argument("-i", "--df", type=str, help="required, dataframe", required=True)
required.add_argument("--index", type=str, help="""required, indices where each line indexes a 
value in the first column (index) of the dataframe
""", required=True)
required.add_argument("-o", "--out", type=str, help="required, output dataframe", dest="out_df", action='store', required=True)

##################################################
# optional args

optional.add_argument("--header", 
                    help="if file contains header, then indicate with flag", \
                    dest='header', action='store_true')
optional.add_argument("--order_by_index", 
                    help="""if it is desired that the dataframe output be ordered
in the same order as the provided index, then indicate with flag
""", dest='order_by_index', action='store_true')


##################################################
parser._action_groups.append(optional)
args = parser.parse_args()

header = 'infer' if args.header else None
df = pd.read_csv(args.df, sep='\t', index_col=0, header=header)
    
with open(args.index, 'r') as f:
    index = [line.strip() for line in f]

if args.order_by_index:
    df_index = set(df.index)
    df = df.ix[[x for x in index if x in df_index]]
else:
    index = set(index)
    df = df.ix[[x for x in list(df.index) if x in index]]

header = True if args.header else False
df.to_csv(args.out_df, sep='\t', na_rep="NA", header=header)
