#combine iupred results in one table 

#prep ORF table
#======================================================================

#SpC
haploi="SA03"
data_orf1=read.table("../../09_iupred/01_iupred_long/orf_SA03_s20.txt")
data_orf1$type=rep("ORF", dim(data_orf1)[1])
data_orf1$haplo=rep(haploi, dim(data_orf1)[1])

#SpB
haploi="SD06"
data_orf2=read.table("../../09_iupred/01_iupred_long/orf_SD06_s20.txt")
data_orf2$type=rep("ORF", dim(data_orf2)[1])
data_orf2$haplo=rep(haploi, dim(data_orf2)[1])


#SpA
haploi="SD01"
data_orf3=read.table("../../09_iupred/01_iupred_long/orf_SD01_s20.txt")
data_orf3$type=rep("ORF", dim(data_orf3)[1])
data_orf3$haplo=rep(haploi, dim(data_orf3)[1])

#Y128
haploi="Y128"
data_orf4=read.table("../../09_iupred/01_iupred_long/orf_Y128_s20.txt")
data_orf4$type=rep("ORF", dim(data_orf4)[1])
data_orf4$haplo=rep(haploi, dim(data_orf4)[1])

data_orf=rbind(data_orf1, data_orf2, data_orf3, data_orf4)
table(data_orf$haplo)


#prep gene table
#======================================================================

#SpC
haploi="SA03"
data_g1=read.table("../../09_iupred/01_iupred_long/gene_A03_s0.txt")
data_g1$type=rep("Gene", dim(data_g1)[1])
data_g1$haplo=rep(haploi, dim(data_g1)[1])

#SpB
haploi="SD06"
data_g2=read.table("../../09_iupred/01_iupred_long/gene_D06_s0.txt")
data_g2$type=rep("Gene", dim(data_g2)[1])
data_g2$haplo=rep(haploi, dim(data_g2)[1])


#SpA
haploi="SD01"
data_g3=read.table("../../09_iupred/01_iupred_long/gene_D01_s0.txt")
data_g3$type=rep("Gene", dim(data_g3)[1])
data_g3$haplo=rep(haploi, dim(data_g3)[1])

#Y128
haploi="Y128"
data_g4=read.table("../../09_iupred/01_iupred_long/gene_YPS128_s0.txt")
data_g4$type=rep("Gene", dim(data_g4)[1])
data_g4$haplo=rep(haploi, dim(data_g4)[1])

data_gene=rbind(data_g1, data_g2, data_g3, data_g4)
data_gene=data_gene[,2:7]
colnames(data_gene)=colnames(data_orf)
table(data_gene$haplo)


data=rbind(data_orf,data_gene)

#Ok table ready for analysis
#estimate disorder mean per orf or gene 

minsize=20
data_size=data[data$V2 >= minsize,]

meanscore=NULL

for (i in 1:dim(data_size)[1]){
		#select liste scores 
		testi=data_size[i,4]
		scori=as.numeric(unlist(strsplit(as.character(testi), ";")))
		meanscore=c(meanscore, mean(scori))

}

#orf_info=data_size$V2
orf_size=data_size$V2
orf_name=data_size$V1
orf_type=data_size$type
haplo=data_size$haplo


#table avec ttes les infos 
table_info=data.frame(name=orf_name, type=orf_type, size_aa=orf_size,mean_dis=meanscore, haplo=haplo) 

#write table with disorder values

write.table(table_info, "../tables_out/09iupred/table_iupred_long.txt", col.names=T, 
sep="\t", quote=F, row.names=F)
