#!/usr/bin/python

import glob
import gzip
from Bio import SeqIO
from argparse import ArgumentParser

def read_taxid_file(taxid_file):
    accessions = {}
    with open(taxid_file, 'rt') as file:
        for line in file:
            lsplit = line.strip().split("\t")
            accessions[lsplit[0]] = lsplit[1]

    return accessions

def create_conversion_table(taxid_file, seq_path, output_file):

    acc_map = read_taxid_file(taxid_file)
    with open(output_file, 'wt') as f_out:
        for acc in acc_map.keys():
            for filename in glob.glob(seq_path + "/" + acc + "*.fna.gz"):
                with gzip.open(filename, 'rt') as f_in:
                    for idx, record in enumerate(SeqIO.parse(f_in, "fasta")):
                        f_out.write(record.id + "\t" + acc_map[acc] + "\n")

def main():

    parser = ArgumentParser("")
    parser.add_argument('-t', '--taxid_map', required=True,
                        help='file mapping ref seq accessions to taxids')
    parser.add_argument('-s', '--seq_path', required=True,
                        help="Folder of input fasta sequences")
    parser.add_argument('-o', '--output', required=True,
                        help="tab delimited conversion file for centrifuge-build")                    
    args = parser.parse_args()

    create_conversion_table(args.taxid_map, args.seq_path, args.output)

if __name__ == "__main__":
    main()
