# -*- coding: utf-8 -*- 

import pandas as pd
import glob
from Bio import SeqIO

OutFile = "transition_table.tsv"

#Build transitions dictionary
trans_dic = {"transA":["Fukomys_mechowii2", "Fukomys_micklemi","ensembl_Fukomys_damarensis", "ensembl_Heterocephalus_glaber_male"], \
        "transB":["ensembl_Chinchilla_lanigera","Octodon_degus"], \
        "transC":["Dipodomys_ordii","Dipodomys_spectabilis2"], \
        "transD":["Chaetodipus_baileyi"], \
        "transE":["Cricetulus_griseus_crigri", "ensembl_Mesocricetus_auratus"], \
        "transF":["Peromyscus_eremicus"], \
        "transG":["Meriones_unguiculatus"], \
        "transH":["Eospalax_fontanierii_baileyi","Nannospalax_galili"], \
        "transI":["Jaculus_jaculus"]}

#[item for sublist in l for item in sublist]
#ConvSp =  [sp for sp_list in trans_dic.glob.glob("Etfb.renamed.*.nt.fasta")values() for sp in sp_list]

ConvSp = []
for sp_list in trans_dic.values():
    for sp in sp_list:
        ConvSp.append(sp)


#Read alignments and get a list of species per gene
gene_info = []

for file_fa in glob.glob("*.fasta"):
    #print(file_fa)
    gene_name = file_fa.split(".")[0]
    #print(gene_name)
    sp_name = [records.id for records in SeqIO.parse(file_fa, "fasta")]
    #print(sp_name)
    nb_sp =len(sp_name)
    list_sp_ex=["Tamias_sibiricus","Fukomys_mechowii2","Fukomys_micklemi","ensembl_Fukomys_damarensis","ensembl_Heterocephalus_glaber_male","ensembl_Chinchilla_lanigera","ensembl_Cavia_porcellus","Dipodomys_spectabilis2","Chaetodipus_baileyi","Heteromys_desmarestianus","ensembl_Mesocricetus_auratus","Myodes_glareolus","Peromyscus_eremicus","Peromyscus_leucopus","Meriones_unguiculatus","ensembl_Rattus_norvegicus","ensembl_Mus_caroli","ensembl_Mus_musculus","ensembl_Mus_pahari","Eospalax_fontanierii","Abrothrix_longipilis","Abrothrix_olivaceus"]
    nb_sp_ex=0
    for i  in sp_name: 
        if i in list_sp_ex:
            nb_sp_ex+=1
		
		#print(nb_sp)    
        
    status_sp=[]
    for sp in sp_name :
        if sp in ConvSp :
            status_sp.append( "ConvSP")
        else:
            status_sp.append( "NonConvSP")
        count_spConv = status_sp.count("ConvSP")
        count_spNonConv = status_sp.count("NonConvSP")
        #print(count_spConv)
	
    gene_info.append([gene_name,nb_sp,count_spConv,count_spNonConv,nb_sp_ex])
gene_info_df = pd.DataFrame(gene_info,columns=["gene_name","nb_sp","count_spConv","count_spNonConv","nb_sp_expre"])
print(gene_info_df.head)

#gene_info_df.reindex(gene_info_df.gene_name)
#print(gene_info_df.head)


#Create table for each transition

sp_transA = trans_dic["transA"]
sp_transB = trans_dic["transB"]
sp_transC = trans_dic["transC"]
sp_transD = trans_dic["transD"]
sp_transE = trans_dic["transE"]
sp_transF = trans_dic["transF"]
sp_transG = trans_dic["transG"]
sp_transH = trans_dic["transH"]
sp_transI = trans_dic["transI"]

transitions=[]
res_tA=[]
for file_fa in glob.glob("*.fasta"):
	gene_name_2 = file_fa.split(".")[0]
	sp_name = [records.id for records in SeqIO.parse(file_fa, "fasta")]
	res_tA = 0
	res_tB = 0
	res_tC = 0
	res_tD = 0
	res_tE = 0
	res_tF = 0
	res_tG = 0
	res_tH = 0
	res_tI = 0
	for sp in sp_name :
		if sp in sp_transA :
			res_tA = 1
		elif sp in sp_transB :
			res_tB = 1
		elif sp in sp_transC :
			res_tC = 1
		elif sp in sp_transD :
			res_tD = 1
		elif sp in sp_transE :
			res_tE = 1
		elif sp in sp_transF :
			res_tF = 1
		elif sp in sp_transG :
			res_tG = 1
		elif sp in sp_transH :
			res_tH = 1
		elif sp in sp_transI :
			res_tI = 1
	nb_transitions = sum([res_tA,res_tB,res_tC,res_tD,res_tE,res_tF,res_tG,res_tH,res_tI])
	transitions.append([gene_name_2,nb_transitions,res_tA,res_tB,res_tC,res_tD,res_tE,res_tF,res_tG,res_tH,res_tI])
trans_df = pd.DataFrame(transitions,columns=["gene_name_2","nb_transitions","res_tA","res_tB","res_tC","res_tD","res_tE","res_tF","res_tG","res_tH","res_tI"])
#trans_df.reindex(trans_df.gene_name_2)
print(trans_df.head)

#Concatenate all table gene_info + transitions...
list_table = [gene_info_df, trans_df]
final_table_int = pd.concat(list_table,  axis=1, join='outer',sort=False)
final_table = final_table_int.drop(columns=['gene_name_2'])
print(final_table.head)

final_table.to_csv(OutFile, sep = "\t", index = False)
