#Pull lines in classification file based on isoform ids
#to run script: python3 Pull.Classification.Lines.by.IsoformID.py <file with isoform ids 1 per line> <full classification file> <output filtered classification file>
#

import sys

#read in isoform ids
#returns list of isoform ids
def read_isoforms():
    isoforms_file = sys.argv[1]
    isoform_list = []
    with open(isoforms_file, 'r') as isoforms:
        for line in isoforms:
            isoform_list.append(line.strip("\n"))
    return isoform_list


#read in classification file
#returns dictionary with key == isoform and value == classification line
def read_class():
    class_file = sys.argv[2]
    class_dict = {}
    with open(class_file, 'r') as class_info:
        for line in class_info:
            if line.startswith("PB"):
                new_line = line.split()
                isoform = new_line[0]
                class_dict.update({isoform:line})
    return class_dict

#filter classification file
#returns final dictionary with key == isoform id and value == classification line
def filter():
    isoforms = read_isoforms()
    class_dict = read_class()
    final_dict = {}
    for isoform in isoforms:
        if isoform in class_dict:
            final_dict.update({isoform:[class_dict[isoform]]})
    return final_dict


#write new classification file
def write():
    filtered_dict = filter()
    output = sys.argv[3]
    with open(output, 'a') as out:
        for iso in filtered_dict:
            final = filtered_dict[iso][0]
            out.write(final)

write()
