#Script to make a presence/absence table  of each annotated ORF per haplotype
# and add an ORF number as unique ID based on position conservation


#read data
data=read.table("../../03synt_intergenic_orf/01_ORF_annotation/all_SID_align.orf")
head(data)
#id orf (SID-start-stop)
data$posorf=paste(data$V1,";",data$V5,";",data$V6,";",data$V7, sep="")
data$len=data$V6-data$V5+1


#orf presence absence table
torf=table(data$posorf, data$V2)
rownames(torf)=paste("ORF","_", 1:dim(torf)[1], ";",rownames(torf),sep="")
dataorf=data.frame(torf)


colnames(dataorf)=c("infos", "haplo", "pres")
tinfos=matrix(unlist(strsplit(as.character(dataorf$infos), ";")), ncol=5, byrow=T)


datafinal=cbind(dataorf,tinfos)
write.table(datafinal, "../../03synt_intergenic_orf/02_ORF_tables/table_orf_sid_aligned_notRM",
quote = FALSE, sep = "\t", row.names=FALSE, col.names=FALSE)
