#script to pull TPM from Kallisto output
#will need to read in isoforms for male or female sex specific files and read in kallisto output for individual samples
#to run script: python3 Pull.Kallisto.TPM.sex.specific.py <sex specific isoform file> <kallisto output sample 1> <kallisto output sample 2> <kallisto output sample3> <kallisto output sample 4> <final output>

import sys

#to read in sex specific isoforms file
#file will 1 column with 1 isoform per line
#returns list of isoform IDs
def read_sex_specific_isoforms():
    isoforms_file = sys.argv[1]
    isoform_list = []
    with open(isoforms_file, 'r') as isoforms:
        for line in isoforms:
            new_line = line.strip("\n")
            isoform_list.append(new_line)
    return isoform_list

#read in kallisto output
#will do this 4 times for 4 samples from 1 sex
#file type =
#target_id       length  eff_length      est_counts      tpm
#need target id and tpm
def read_liver_kallisto():
    kallisto_file = sys.argv[2]
    liver_dict = {}
    with open(kallisto_file, 'r') as kallisto:
        for line in kallisto:
            if line.startswith("target_id"):
                continue
            else:
                new_line = line.split()
                isoform = new_line[0]
                tpm = float(new_line[4])
                liver_dict.update({isoform:tpm})
    return liver_dict

def read_brain_kallisto():
    kallisto_file = sys.argv[3]
    brain_dict = {}
    with open(kallisto_file, 'r') as kallisto:
        for line in kallisto:
            if line.startswith("target_id"):
                continue
            else:
                new_line = line.split()
                isoform = new_line[0]
                tpm = float(new_line[4])
                brain_dict.update({isoform:tpm})
    return brain_dict


def read_pronephros_kallisto():
    kallisto_file = sys.argv[4]
    pronephros_dict = {}
    with open(kallisto_file, 'r') as kallisto:
        for line in kallisto:
            if line.startswith("target_id"):
                continue
            else:
                new_line = line.split()
                isoform = new_line[0]
                tpm = float(new_line[4])
                pronephros_dict.update({isoform:tpm})
    return pronephros_dict


def read_gonad_kallisto():
    kallisto_file = sys.argv[5]
    gonad_dict = {}
    with open(kallisto_file, 'r') as kallisto:
        for line in kallisto:
            if line.startswith("target_id"):
                continue
            else:
                new_line = line.split()
                isoform = new_line[0]
                tpm = float(new_line[4])
                gonad_dict.update({isoform:tpm})
    return gonad_dict


#need to sort isoforms to only include sex-specific isoforms
def sort_kallisto_tpm():
    sex_specific_isoforms = read_sex_specific_isoforms()
    liver_all_tpms = read_liver_kallisto()
    brain_all_tpms = read_brain_kallisto()
    pronephros_all_tpms = read_pronephros_kallisto()
    gonad_all_tpms = read_gonad_kallisto()
    liver_tpms = {}
    brain_tpms = {}
    pronephros_tpms = {}
    gonad_tpms = {}
    for isoform in sex_specific_isoforms:
        if isoform in liver_all_tpms:
            liver_tpms.update({isoform:liver_all_tpms[isoform]})
        if isoform in brain_all_tpms:
            brain_tpms.update({isoform:brain_all_tpms[isoform]})
        if isoform in pronephros_all_tpms:
            pronephros_tpms.update({isoform:pronephros_all_tpms[isoform]})
        if isoform in gonad_tpms:
            gonad_tpms.update({isoform:gonad_all_tpms[isoform]})
    print(len(sex_specific_isoforms))
    print(len(liver_tpms))
    print(len(brain_tpms))
    print(len(pronephros_tpms))
    print(len(gonad_tpms))

sort_kallisto_tpm()
