#!/usr/bin/env python
import pandas as pd
from GGR import utils
from sys import argv

gtf = argv[1]
gene_id_to_gene_type_df = argv[2]
out_prefix = argv[3]

gene_id_to_gene_type_df = pd.read_csv(gene_id_to_gene_type_df, sep='\t')
gene_id_to_gene_type = {gene_id:gene_type for gene_id,gene_type in zip(gene_id_to_gene_type_df.gene_id, gene_id_to_gene_type_df.gene_type)}

fh_by_gene_type = {gene_type:open('%s%s.bed'%(out_prefix, gene_type), 'w') for gene_type in sorted(set(gene_id_to_gene_type.values()))}
fh_by_gene_type['all'] = open('%s.bed'%(out_prefix), 'w')

with open(gtf, 'r') as f:
    for i, line in enumerate(f):
        if i % 10000 == 0: print i
        if '#' in line:
            continue
        parsed = utils.gencode_gtf_line_parser(line)
        if parsed['annotation_type'] == 'gene':
            fh_by_gene_type[parsed['gene_type']].write('\t'.join([parsed['chrom'],str(parsed['chromStart']),str(parsed['chromEnd']),parsed['gene_id']]) + '\n')
            fh_by_gene_type['all'].write('\t'.join([parsed['chrom'],str(parsed['chromStart']),str(parsed['chromEnd']),parsed['gene_id']]) + '\n')

for gene_type in sorted(set(gene_id_to_gene_type.values())):
    fh_by_gene_type[gene_type].close()