#merge count data for diff analysis 


MyMerge       <- function(x, y){
  df            <- merge(x, y, by= "row.names", all.x= T, all.y= T)
  rownames(df)  <- df$Row.names
  df$Row.names  <- NULL
  return(df)
}

#####################################################################
#GENES 
#####################################################################
#merge genes files 
file1="../../05diff_exp/02_read_counts_full/Y128_genes_stat1_spar_60nt.count"
file2="../../05diff_exp/02_read_counts_full/SD01_genes_stat1_spar_60nt.count"
file3="../../05diff_exp/02_read_counts_full/SD06_genes_stat1_spar_60nt.count"
file4="../../05diff_exp/02_read_counts_full/SA03_genes_stat1_spar_60nt.count"

#read file with counts and add name rows with gene names
data1=read.table(file1)
data1$names=matrix(unlist(strsplit(as.character(data1$V9),";")), byrow=T, ncol=5)[,5]
data1f=data1[,10:13]
rownames(data1f)=data1$names
colnames(data1f)=c("RPF_Cer1","RPF_Cer3", "TOT_Cer1", "TOT_Cer3" )

data2=read.table(file2)
data2$names=matrix(unlist(strsplit(as.character(data2$V9),";")), byrow=T, ncol=5)[,5]
data2f=data2[,10:13]
rownames(data2f)=data2$names
colnames(data2f)=c("RPF_AA1","RPF_AA3", "TOT_AA1", "TOT_AA3" )


data3=read.table(file3)
data3$names=matrix(unlist(strsplit(as.character(data3$V9),";")), byrow=T, ncol=5)[,5]
data3f=data3[,10:13]
rownames(data3f)=data3$names
colnames(data3f)=c("RPF_BB1","RPF_BB3", "TOT_BB1", "TOT_BB3" )


data4=read.table(file4)
data4$names=matrix(unlist(strsplit(as.character(data4$V9),";")), byrow=T, ncol=5)[,5]
data4f=data4[,10:13]
rownames(data4f)=data4$names
colnames(data4f)=c("RPF_CC1","RPF_CC2", "TOT_CC1", "TOT_CC2")


#merge all
dat_all=Reduce(MyMerge, list(data1f, data2f, data3f, data4f))

#write all
outname="../../05diff_exp/03_table_counts_deseq2/counts_genes.txt"
write.table(dat_all,outname, sep="\t", quote=F )
#second folder bilan
outname="../tables_out/04counts/counts_genes.txt"
write.table(dat_all,outname, sep="\t", quote=F )


#####################################################################
#ORF 
#####################################################################

#select annotations with no ambiguities
data1=read.table(file1)
data1f=data1[,10:13]
infos=matrix(unlist(strsplit(as.character(data1$V9), ";")), ncol=5, byrow=T)[,1]
rownames(data1f)=infos
colnames(data1f)=c("RPF_Cer1","RPF_Cer3", "TOT_Cer1", "TOT_Cer3" )


data2=read.table(file2)
data2f=data2[,10:13]
infos=matrix(unlist(strsplit(as.character(data2$V9), ";")), ncol=5, byrow=T)[,1]
rownames(data2f)=infos
colnames(data2f)=c("RPF_AA1","RPF_AA3", "TOT_AA1", "TOT_AA3" )


data3=read.table(file3)
data3f=data3[,10:13]
infos=matrix(unlist(strsplit(as.character(data3$V9), ";")), ncol=5, byrow=T)[,1]
rownames(data3f)=infos
colnames(data3f)=c("RPF_BB1","RPF_BB3", "TOT_BB1", "TOT_BB3" )


data4=read.table(file4)
data4f=data4[,10:13]
infos=matrix(unlist(strsplit(as.character(data4$V9), ";")), ncol=5, byrow=T)[,1]
rownames(data4f)=infos
colnames(data4f)=c("RPF_CC1","RPF_CC2", "TOT_CC1", "TOT_CC2" )


#merge all
dat_orf=Reduce(MyMerge, list(data1f, data2f, data3f, data4f))

cons=read.table("../tables_out/02conservation/conservation_table_spar.txt", header=T)
consred=data.frame(name=cons$orf, cons=cons$group)

dat_orf$names=sub("id_orf=","",rownames(dat_orf))
dat_orf2=merge(dat_orf, consred, by.x="names", by.y="name")

newname=paste(dat_orf2$names,";", dat_orf2$cons, sep="")
rownames(dat_orf2)=newname
dat_orf2=dat_orf2[,2:17]
#write all
outname="../../05diff_exp/03_table_counts_deseq2/counts_orfs_statall.txt"

write.table(dat_orf2,outname, sep="\t", quote=F )

#extract significants orfs#############################################################
data_stat=read.table("../tables_out/03plastid/table_all_stat.txt", header=T)
sig_names=unique(data_stat$name[data_stat$sig=="1" & data_stat$type=="orf"])

dat_orf2=merge(dat_orf, consred, by.x="names", by.y="name")
newname=paste(dat_orf2$names,";", dat_orf2$cons, sep="")
rownames(dat_orf2)=newname
dat_orfsig=dat_orf2[dat_orf2$names %in% sig_names,]

dat_orfsig=dat_orfsig[,2:17]
#write sig
outname="../../05diff_exp/03_table_counts_deseq2/counts_orfs_sigphas.txt"
write.table(dat_orfsig,outname, sep="\t", quote=F )
#write sig in bilan folder 
outname="../tables_out/04counts/counts_orfs_sigphas.txt"
write.table(dat_orfsig,outname, sep="\t", quote=F )

