import sys

database_filename = sys.argv[1]
gene_subtype_map = {}
# first parse database
database_file = open(database_filename,"r")
database_file.readline()
for line in database_file:
    items = line.strip().split("\t")
    subtype = items[0]
    subitems = items[1][1:-1].split(", ")
    for subitem in subitems:
       gene = subitem.strip()[1:-1]
       gene_subtype_map[gene] = subtype
database_file.close()

# now parse total number of reads
readnumber_filename = sys.argv[2]
readnumber_file = open(readnumber_filename,"r")
line = readnumber_file.readline()
readnumber = long(line)
readnumber_file.close()

# now parse blastx output
gene_barcode_map = {}
#pident_threshold = 80
pident_threshold = 90
length_threshold = 25
evalue_threshold = 1e-07

blastx_filename = sys.argv[3]
blastx_file = open(blastx_filename,"r")
for line in blastx_file:
    items = line.split("\t")
    barcode = items[0].strip().split(":")[1]
    barcode = barcode.split(",")[0]
    target_gene = items[1]
    pident = float(items[2])
    length = long(items[3])
    evalue = float(items[10])
    if pident>=pident_threshold and length>=length_threshold and evalue<=evalue_threshold:
        if target_gene not in gene_barcode_map:
            gene_barcode_map[target_gene] = []
        gene_barcode_map[target_gene].append(barcode)
blastx_file.close()

# normalize to ppm
ppm_multiplier = 1e06/readnumber

# create counts
antibiotic_count_map = {}
for gene_name in gene_barcode_map:
    
    subtype = gene_subtype_map[gene_name]

    antibiotic = subtype.split("__")[0]
    #antibiotic = subtype

    if antibiotic not in antibiotic_count_map:
        antibiotic_count_map[antibiotic] = 0
    
    antibiotic_count_map[antibiotic] += len(gene_barcode_map[gene_name])
    
    barcode_str = ",".join(gene_barcode_map[gene_name])

    output_str = "\t".join([gene_name, subtype, "%g" % (len(gene_barcode_map[gene_name])*ppm_multiplier), barcode_str])
    
    print output_str

#for antibiotic in sorted(antibiotic_count_map):
#    print antibiotic, antibiotic_count_map[antibiotic]*1000000.0/readnumber
