#merge count data for diff analysis 


MyMerge       <- function(x, y){
  df            <- merge(x, y, by= "row.names", all.x= T, all.y= T)
  rownames(df)  <- df$Row.names
  df$Row.names  <- NULL
  return(df)
}

#####################################################################
#GENES 
#####################################################################
#merge genes files 
file1r="../../05diff_exp_start50/02_read_counts_full/Y128_genes_stat1_spar_60nt_start60.rpfcount"
file2r="../../05diff_exp_start50/02_read_counts_full/SD01_genes_stat1_spar_60nt_start60.rpfcount"
file3r="../../05diff_exp_start50/02_read_counts_full/SD06_genes_stat1_spar_60nt_start60.rpfcount"
file4r="../../05diff_exp_start50/02_read_counts_full/SA03_genes_stat1_spar_60nt_start60.rpfcount"

file1t="../../05diff_exp_start50/02_read_counts_full/Y128_genes_stat1_spar_60nt_start60.totcount"
file2t="../../05diff_exp_start50/02_read_counts_full/SD01_genes_stat1_spar_60nt_start60.totcount"
file3t="../../05diff_exp_start50/02_read_counts_full/SD06_genes_stat1_spar_60nt_start60.totcount"
file4t="../../05diff_exp_start50/02_read_counts_full/SA03_genes_stat1_spar_60nt_start60.totcount"


#read file with counts and add name rows with gene names
data1r=read.table(file1r)
data1t=read.table(file1t)
data1r$names=matrix(unlist(strsplit(as.character(data1r$V9),";")), byrow=T, ncol=5)[,5]
data1f=cbind(data1r[,10:11],data1t[,10:11])
rownames(data1f)=data1r$names
colnames(data1f)=c("RPF_Cer1","RPF_Cer3", "TOT_Cer1", "TOT_Cer3" )


data2r=read.table(file2r)
data2t=read.table(file2t)
data2r$names=matrix(unlist(strsplit(as.character(data2r$V9),";")), byrow=T, ncol=5)[,5]
data2f=cbind(data2r[,10:11],data2t[,10:11])
rownames(data2f)=data2r$names
colnames(data2f)=c("RPF_AA1","RPF_AA3", "TOT_AA1", "TOT_AA3" )


data3r=read.table(file3r)
data3t=read.table(file3t)
data3r$names=matrix(unlist(strsplit(as.character(data3r$V9),";")), byrow=T, ncol=5)[,5]
data3f=cbind(data3r[,10:11],data3t[,10:11])
rownames(data3f)=data3r$names
colnames(data3f)=c("RPF_BB1","RPF_BB3", "TOT_BB1", "TOT_BB3" )


data4r=read.table(file4r)
data4t=read.table(file4t)
data4r$names=matrix(unlist(strsplit(as.character(data4r$V9),";")), byrow=T, ncol=5)[,5]
data4f=cbind(data4r[,10:11],data4t[,10:11])
rownames(data4f)=data4r$names
colnames(data4f)=c("RPF_CC1","RPF_CC2", "TOT_CC1", "TOT_CC2")


#merge all
dat_all=Reduce(MyMerge, list(data1f, data2f, data3f, data4f))

#write all
outname="../../05diff_exp_start50/03_table_counts_deseq2/counts_genes_start60.txt"
write.table(dat_all,outname, sep="\t", quote=F )
#second folder bilan
outname="../tables_out/04counts/counts_genes_start60.txt"
write.table(dat_all,outname, sep="\t", quote=F )



#####################################################################
#ORF 
#####################################################################
#merge files 
file1r="../../05diff_exp_start50/02_read_counts_full/Y128_orf_statall_spar_start60.rpfcount"
file2r="../../05diff_exp_start50/02_read_counts_full/SD01_orf_statall_spar_start60.rpfcount"
file3r="../../05diff_exp_start50/02_read_counts_full/SD06_orf_statall_spar_start60.rpfcount"
file4r="../../05diff_exp_start50/02_read_counts_full/SA03_orf_statall_spar_start60.rpfcount"

file1t="../../05diff_exp_start50/02_read_counts_full/Y128_orf_statall_spar_start60.totcount"
file2t="../../05diff_exp_start50/02_read_counts_full/SD01_orf_statall_spar_start60.totcount"
file3t="../../05diff_exp_start50/02_read_counts_full/SD06_orf_statall_spar_start60.totcount"
file4t="../../05diff_exp_start50/02_read_counts_full/SA03_orf_statall_spar_start60.totcount"



#read file with counts and add name rows with gene names
data1r=read.table(file1r)
data1t=read.table(file1t)
data1r$names=matrix(unlist(strsplit(as.character(data1r$V9),";")), byrow=T, ncol=5)[,1]
data1f=cbind(data1r[,10:11],data1t[,10:11])
rownames(data1f)=data1r$names
colnames(data1f)=c("RPF_Cer1","RPF_Cer3", "TOT_Cer1", "TOT_Cer3" )


data2r=read.table(file2r)
data2t=read.table(file2t)
data2r$names=matrix(unlist(strsplit(as.character(data2r$V9),";")), byrow=T, ncol=5)[,1]
data2f=cbind(data2r[,10:11],data2t[,10:11])
rownames(data2f)=data2r$names
colnames(data2f)=c("RPF_AA1","RPF_AA3", "TOT_AA1", "TOT_AA3" )


data3r=read.table(file3r)
data3t=read.table(file3t)
data3r$names=matrix(unlist(strsplit(as.character(data3r$V9),";")), byrow=T, ncol=5)[,1]
data3f=cbind(data3r[,10:11],data3t[,10:11])
rownames(data3f)=data3r$names
colnames(data3f)=c("RPF_BB1","RPF_BB3", "TOT_BB1", "TOT_BB3" )


data4r=read.table(file4r)
data4t=read.table(file4t)
data4r$names=matrix(unlist(strsplit(as.character(data4r$V9),";")), byrow=T, ncol=5)[,1]
data4f=cbind(data4r[,10:11],data4t[,10:11])
rownames(data4f)=data4r$names
colnames(data4f)=c("RPF_CC1","RPF_CC2", "TOT_CC1", "TOT_CC2")




#merge all
dat_orf=Reduce(MyMerge, list(data1f, data2f, data3f, data4f))

cons=read.table("../tables_out/02conservation/conservation_table_spar.txt", header=T)
consred=data.frame(name=cons$orf, cons=cons$group)

dat_orf$names=sub("id_orf=","",rownames(dat_orf))
dat_orf2=merge(dat_orf, consred, by.x="names", by.y="name")

newname=paste(dat_orf2$names,";", dat_orf2$cons, sep="")
rownames(dat_orf2)=newname
dat_orf2=dat_orf2[,2:17]
#write all
outname="../../05diff_exp_start50/03_table_counts_deseq2/counts_orfs_statall_start60.txt"

write.table(dat_orf2,outname, sep="\t", quote=F )

#extract significants orfs#############################################################
data_stat=read.table("../tables_out/03plastid/table_all_stat.txt", header=T)
sig_names=unique(data_stat$name[data_stat$sig=="1" & data_stat$type=="orf"])

dat_orf2=merge(dat_orf, consred, by.x="names", by.y="name")
newname=paste(dat_orf2$names,";", dat_orf2$cons, sep="")
rownames(dat_orf2)=newname
dat_orfsig=dat_orf2[dat_orf2$names %in% sig_names,]

dat_orfsig=dat_orfsig[,2:17]
#write sig
outname="../../05diff_exp_start50/03_table_counts_deseq2/counts_orfs_sigphas_start60.txt"
write.table(dat_orfsig,outname, sep="\t", quote=F )
#write sig in bilan folder 
outname="../tables_out/04counts/counts_orfs_sigphas_start60.txt"
write.table(dat_orfsig,outname, sep="\t", quote=F )

