#!/usr/bin/env python
import pandas as pd
import argparse

parser = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter, \
description="""

subset_df_by_col.py

Given a dataframe and an index of columns, subset dataframe by column. 
Also can sort by columns if --order_by_cols.
By default, dataframe is assumed to contain header.

""")

optional = parser._action_groups.pop()
required = parser.add_argument_group('required arguments')

##################################################
# required args

required.add_argument("-i", "--df", type=str, help="required, dataframe", required=True)
required.add_argument("--columns", type=str, help="""required, columns where each line is
a separate column
""", required=True)
required.add_argument("-o", "--out", type=str, help="required, output dataframe", action='store', required=True)

##################################################
# optional args

optional.add_argument("--no_header", 
                    help="if file contains header, then indicate with flag", \
                    action='store_true')
optional.add_argument("--order_by_cols", 
                    help="""if it is desired that the dataframe output be ordered
in the same order as the provided index, then indicate with flag
""", action='store_true')


##################################################
parser._action_groups.append(optional)
args = parser.parse_args()

header = 'infer' if not args.no_header else None
df = pd.read_csv(args.df, sep='\t', index_col=0, header=header)
    
with open(args.columns, 'r') as f:
    columns = [line.strip() for line in f]

if args.order_by_cols:
    df = df[columns]
else:
    index = set(columns)
    df = df[[x for x in list(df.columns) if x in columns]]

header = True if not args.no_header else False
df.to_csv(args.out, sep='\t', header=header)