#!/usr/bin/python3

import gzip
from Bio import SeqIO
from Bio.Seq import Seq
from argparse import ArgumentParser

def read_conversion_table(tsv_file):
    seqids = {}
    with open(tsv_file, 'rt') as file:
        for line in file:
            lsplit = line.strip().split("\t")
            seqids[lsplit[0]] = lsplit[1]

    return seqids

def add_kraken_header_to_seqids(tsv_file, input_file, output_file):

    conversion_table = read_conversion_table(tsv_file)
    with open(output_file, 'wt') as f_out:
        with open(input_file, 'rt') as f_in:
            for idx, record in enumerate(SeqIO.parse(f_in, "fasta")):
                if record.id in conversion_table.keys():
                    new_id = record.id + "|kraken:taxid|" + str(conversion_table[record.id])
                    new_record = SeqIO.SeqRecord(Seq(record.seq), id=new_id)
                    r=SeqIO.write(new_record, f_out, 'fasta')
                    if r!=1: print('Error while writing sequence:  ' + new_record.id)

def main():

    parser = ArgumentParser("")
    parser.add_argument('-t', '--conversion_table', required=True,
                        help='centrifuge conversion table mapping seq ids to taxon ids')
    parser.add_argument('-f', '--fasta_file', required=True,
                        help="fasta sequence file containing all sequences for building the kraken2 db")
    parser.add_argument('-o', '--output', required=True,
                        help="fasta file with added taxon ids needed by kraken2")                    
    args = parser.parse_args()

    add_kraken_header_to_seqids(args.conversion_table, args.fasta_file, args.output)

if __name__ == "__main__":
    main()
