#sort GTF file based on list of isoform IDs
#to run script: python3 Sort.GTF.by.isoforms.py <isoform list file> <gtf file> <output gtf file>
#Author: Alice Naftaly, Jan 2021

import sys

#read isoform IDs
#returns list of isoform ids
def read_IDs():
    id_file = sys.argv[1]
    isoforms = []
    with open(id_file, 'r') as IDs:
        for line in IDs:
            iso = line.strip("\n")
            isoforms.append(iso)
    return isoforms

#read in GTF file
#returns gtf dictionary with key == isoform id and value == exon lines
def read_gtf():
    gtf_file = sys.argv[2]
    gtf_dict = {}
    with open(gtf_file, 'r') as gtf:
        for line in gtf:
            new_line = line.split("\t")
            transcript_info = new_line[8].split(" ")
            transcript_id = transcript_info[1].strip(";")
            iso_id = transcript_id.strip("\"")
            if iso_id in gtf_dict:
                gtf_dict[iso_id].append(line)
            elif iso_id not in gtf_dict:
                gtf_dict.update({iso_id:[line]})
    return gtf_dict

#sort gtf dict by isoform
#returns dictionary with sorted isoforms
def sort_isoforms():
    gtf = read_gtf()
    isoforms = read_IDs()
    sorted_gtf = {}
    for iso in isoforms:
        if iso in gtf:
            sorted_gtf.update({iso:gtf[iso]})
    return sorted_gtf

#write output
def write():
    sorted_gtf = sort_isoforms()
    output = sys.argv[3]
    with open(output, 'a') as out:
        for iso in sorted_gtf:
            single_iso = sorted_gtf[iso]
            if len(single_iso) == 1:
                final = single_iso[0]
                out.write(final)
            else:
                for val in single_iso:
                    out.write(val)

write()
