import sys

non_best_match = [l.strip() for l in open(sys.argv[1],"r").readlines()]


nbm_dict = {}
pairs_list = []
for ln in non_best_match:
    split_ln = ln.strip().split("\t")
    gene1 = split_ln[0]
    gene2 = split_ln[1]
    score = float(split_ln[-1])
    nbm_dict[gene1] = score
    nbm_dict[gene2] = score
    pairs_list.append(gene1+"@"+gene2)


all_lines = [l.strip() for l in open(sys.argv[2],"r").readlines()]

better_matches_dict = {}
for ln in all_lines:
    split_ln = ln.split("\t")
    gene1 = split_ln[0]
    gene2 = split_ln[1]
    score = float(split_ln[-1])
    
    if gene1 in nbm_dict.keys():
        if score > nbm_dict[gene1]:
            if gene1 in better_matches_dict.keys():
                tmp = better_matches_dict[gene1]
                tmp.append(ln)
                better_matches_dict[gene1] = tmp
            else:
                better_matches_dict[gene1] = [ln]

    if gene2 in nbm_dict.keys():
        if score > nbm_dict[gene2]:
            if gene2 in better_matches_dict.keys():
                tmp = better_matches_dict[gene2]
                tmp.append(ln)
                better_matches_dict[gene2] = tmp
            else:
                better_matches_dict[gene2] = [ln]

outlines = []
outlines_groups = []
outlines_cs = []
outlines_cs_groups = []
for pair in pairs_list:
    [pair1,pair2] = pair.strip().split("@")

    orginal_score = nbm_dict[pair1]
    pair1_matches = []
    pair2_matches = []
    tmp_lines = []
    cross_species = "FALSE"
    tmp_lines.append("--Start--\n")
    tmp_lines.append("#0 " + pair1 + "\t" + pair2 + "\t" + str(orginal_score) + "\n")
    if pair1 in better_matches_dict.keys():
        for ln in better_matches_dict[pair1]:
            split_ln = ln.split("\t")
            gene1 = split_ln[0]
            gene2 = split_ln[1]
            species1 = gene1.split("|")[0]
            species2 = gene2.split("|")[0]
            score = split_ln[-1]
            tmp_lines.append("#1" + gene1 + "\t" + gene2 + "\t" + score + "\n")
            pair1_matches.append(gene1)
            pair1_matches.append(gene2)
            if not (species1==species2) and not (gene1 == pair2) and not (gene2 == pair2):
                cross_species = "TRUE" 
    if pair2 in better_matches_dict.keys():
        for ln in better_matches_dict[pair2]:
            split_ln = ln.split("\t")
            gene1 = split_ln[0]
            gene2 = split_ln[1]
            species1 = gene1.split("|")[0]
            species2 = gene2.split("|")[0]
            score = split_ln[-1]
            tmp_lines.append("#2" + gene1 + "\t" + gene2 + "\t" + score + "\n")
            pair2_matches.append(gene1)
            pair2_matches.append(gene2)
            if not (species1==species2) and not (gene1 == pair1) and not (gene2 == pair1):
                cross_species = "TRUE"
    tmp_lines.append("--Stop--\n")

    
    pair1_mathces = list(set(pair1_matches))
    pair2_matches = list(set(pair2_matches))

    if pair1 in pair1_mathces:
        pair1_mathces.remove(pair1)
    if pair2 in pair2_matches:
        pair2_matches.remove(pair2)
    
   
    if cross_species == "FALSE":
        outlines.extend(tmp_lines)
        outlines_groups.append(pair1 + "\t" + pair2 + "\t" + "\t".join(pair1_mathces) + "\t" + "\t".join(pair2_matches) + "\n")
    elif cross_species == "TRUE":
        outlines_cs.extend(tmp_lines)
        outlines_cs_groups.append(pair1 + "\t" + pair2 + "\t" + "\t".join(pair1_mathces) + "\t" + "\t".join(pair2_matches) + "\n")

output = open(sys.argv[1] + ".within_species.better_matches","w")
output.write("".join(outlines))
output.close()

output = open(sys.argv[1] + ".within_species.better_matches.groups","w")
output.write("".join(outlines_groups))
output.close()

output = open(sys.argv[1] + ".between_species.better_matches","w")
output.write("".join(outlines_cs))
output.close()

output = open(sys.argv[1] + ".between_species.better_matches.groups","w")
output.write("".join(outlines_cs_groups))
output.close()


