#flipping transcript id from Ensembl id or novel to isoform ID; also remove isoform id from gene info
#to run script: python3 AlterTranscriptID.py <input gtf> <output gtf>
#Author: Alice Naftaly, Nov 2020

import sys

#
def read_gtf():
    gtf_file = sys.argv[1]
    output = sys.argv[2]
    with open(gtf_file, 'r') as gtf, open(output,'a') as out:
        for line in gtf:
            new_line = line.split("\t")
            gene_info_strip = new_line[8].strip("\n")
            gene_info = gene_info_strip.split("; ")
            new_gene_info = []
            x = 0
            for value in gene_info:
                if value.startswith("isoform_id"):
                    x += 1
                    value_split = value.split(" ")
                    isoform_id = value_split[1].strip("\n")
                    for v in gene_info:
                        if v.startswith("transcript_id"):
                            new_transcript_id = "transcript_id " + isoform_id
                            new_gene_info.append(new_transcript_id)
                        elif v.startswith("isoform_id"):
                            continue
                        else:
                            new_gene_info.append(v)
            if x == 0:
                for val in gene_info:
                    if val.startswith("isoform_gene_id"):
                        continue
                    else:
                        new_gene_info.append(val)
            final_gene_info = "; ".join(new_gene_info)
            adjusted_line = "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n" % (new_line[0], new_line[1], new_line[2], new_line[3], new_line[4], new_line[5], new_line[6], new_line[7], final_gene_info)
            out.write(adjusted_line)
read_gtf()
