#script to normalize read counts per lib type (rpf or tot)
library(RColorBrewer)

#read data with counts
data_genes=read.table("../tables_out/04counts/counts_genes_start60.txt")
data_orfsig=read.table("../tables_out/04counts/counts_orfs_sigphas_start60.txt")
data_all=rbind(data_genes, data_orfsig)


#remove NA 
row.has.na <- apply(data_all, 1, function(x){any(is.na(x))})
data_all=data_all[!row.has.na,]

#write table ewith all counts not norm #################################

write.table(data_all, "../tables_out/04counts/Deseq2_start60/1.counts_all_RPF_TOT_notnorm_start60.txt", col.names=T, 
sep="\t", quote=F, row.names=T)


########################################################################
# DESEQ2 NORMALIZATION 
########################################################################
library("DESeq2")
library("pheatmap")
#prep tables ###########################################################
#make a table with rpf counts
select_rpf=c(1,2,5,6,9,10, 13,14)
data_rpf=data_all[,select_rpf]


#make a table with tot counts
select_tot=c(3,4,7,8,11,12,15,16)
data_tot=data_all[,select_tot]

#Analysis  #############################################################
#with rpf data
##########################################################################
type="RPF"
countdata=data_rpf
coldata=data.frame(type=rep(type, 8), lineage=rep(c("Cer", "A", "B", "C"), each=2))
rownames(coldata)=colnames(countdata)

dds_rpf <- DESeqDataSetFromMatrix(countData = countdata,
                              colData = coldata,
                              design = ~ lineage)



dds_rpf <- DESeq(dds_rpf)

rld_rpf <- rlog(dds_rpf, blind=TRUE)
plotPCA( rld_rpf, intgroup = c( "lineage"))
#pheatmap(assay(rld_rpf),cluster_cols = FALSE)

#extract normalized data 
datanorm_rpf=counts(dds_rpf, normalized=TRUE)
write.table(datanorm_rpf, "../tables_out/04counts/Deseq2_start60/1.counts_all_RPF_norm_deseq2_start60.txt", col.names=T, 
sep="\t", quote=F, row.names=T)

########################################################################
#with tot data #########################################################
########################################################################


type="TOT"
countdata=data_tot
coldata=data.frame(type=rep(type, 8), lineage=rep(c("Cer", "A", "B", "C"), each=2))
rownames(coldata)=colnames(countdata)

dds_tot <- DESeqDataSetFromMatrix(countData = countdata,
                              colData = coldata,
                              design = ~ lineage)

dds_tot <- DESeq(dds_tot)
rld_tot <- rlog(dds_tot, blind=TRUE)
plotPCA( rld_tot, intgroup = c( "lineage"))
pheatmap(assay(rld_tot),cluster_cols = FALSE)

#extract normalized data 
datanorm_tot=counts(dds_tot, normalized=TRUE)
write.table(datanorm_tot, "../tables_out/04counts/Deseq2_start60/1.counts_all_TOT_norm_deseq2_start60.txt", col.names=T, 
sep="\t", quote=F, row.names=T)



