options(stringsAsFactors = F)

library(DESeq2)
library(pheatmap)
library('pvclust')
library(bitops)


get_first <- function (vec) {
  return(vec[1])
}

#########################################################################################################
# my.kallisto.file <- "../Kallisto//2017-05-19_Rat_aging_Brain_Kallisto_MaleOnly.txt"
# my.tissue <- "Brain"
# 
process_aging_rat_rna <- function(my.tissue, my.kallisto.file, my.5m = 4, my.26m = 4, my.sex = rep('M',8)) {
  
  my.outprefix <- paste(Sys.Date(),"Rat_aging",my.tissue,"kallisto_DEseq_linear_analysis", sep = "_")
  
  # read in Kallisto mappings
  my.data <- read.csv(my.kallisto.file, sep = "\t", header = T)
  
  # sum read over genes (to not have results over transcripts for DEseq2)
  ncols <- dim(my.data)[2]
  my.data.per.gene <- aggregate(my.data[,6:ncols],by=list(my.data$GeneSymbol),FUN=sum)
  
  # round counts (DESeq needs integers)
  my.data.per.gene[,-1] <- round(my.data.per.gene[,-1])
  rownames(my.data.per.gene) <- my.data.per.gene$Group.1
  colnames(my.data.per.gene)[1] <- 'GeneName'
  
  # get the genes with no reads out
  my.null <- which(apply(my.data.per.gene[,-1], 1, sum) <= 5) # see deseq2 vignetter
  my.filtered.matrix <- my.data.per.gene[-my.null,-1]
  
  # age in weeks
  my.age <- c(rep(5.25,my.5m),rep(26,my.26m))
  
  # design matrix
  dataDesign = data.frame( row.names = colnames( my.filtered.matrix ), 
                           age = my.age,
                           sex = my.sex)
  
  # get matrix using age as a modeling covariate
  if (length(unique(my.sex))>1) {
    dds <- DESeqDataSetFromMatrix(countData = my.filtered.matrix,
                                  colData = dataDesign,
                                  design = ~ age + sex)    
  } else {
    dds <- DESeqDataSetFromMatrix(countData = my.filtered.matrix,
                                  colData = dataDesign,
                                  design = ~ age)
    
  }
  
  # run DESeq normalizations and export results
  dds.deseq <- DESeq(dds)
  
  res <- results(dds.deseq, name = "age") # added the name of the tested variable
  
  # plot dispersion
  my.disp.out <- paste(my.outprefix,"_dispersion_plot.pdf", sep ="_")
  
  pdf(my.disp.out)
  plotDispEsts(dds.deseq)
  dev.off()
  
  # parse sample names
  my.sample.names <- c(paste("5.25m",1:my.5m, sep = ""),paste("26m",1:my.26m, sep = ""))

  # normalized expression value
  tissue.cts <- log2( counts(dds.deseq, normalize = TRUE) + 0.01)
  colnames(tissue.cts) <- my.sample.names
  
  # do MDS analysis
  mds.result <- cmdscale(1-cor(tissue.cts,method="spearman"), k = 2, eig = FALSE, add = FALSE, x.ret = FALSE)
  x <- mds.result[, 1]
  y <- mds.result[, 2]
    
  my.colors <- c(rep("coral",my.5m), rep("dodgerblue",my.26m))
  
  my.mds.out <- paste(my.outprefix,"_MDS_plot.pdf", sep ="_")
  
  pdf(my.mds.out)
  plot(x, y, xlab = "MDS dimension 1", ylab = "MDS dimension 2",main="Multi-dimensional Scaling",cex=2)
  points(x, y, pch=16,col=my.colors,cex=2)
  text(x, y,my.sample.names ,col="grey",cex=0.5, pos  = 1)
  legend("topleft",c("5.25m","26m"),col=c("coral","dodgerblue"),pch=16,bty='n',pt.cex=2)
  dev.off()
  
  
  #### PCA #### 
  my.pos.var <- apply(tissue.cts,1,var) >0
  # do PCA analysis
  my.pca <- prcomp(t(tissue.cts[my.pos.var,]),scale = TRUE)
  x <- my.pca$x[,1]
  y <- my.pca$x[,2]
  z <- my.pca$x[,3]
  
  my.summary <- summary(my.pca)
  
  my.pca.out <- paste(my.outprefix,"PCA_plot.pdf",sep="_")
  
  pdf(my.pca.out)
  plot(x,y,pch = 1, cex=3, 
       xlab = paste('PC1 (', round(100*my.summary$importance[,1][2],1),"%)", sep=""),
       ylab = paste('PC2 (', round(100*my.summary$importance[,2][2],1),"%)", sep=""),
       cex.lab = 1.5, main = "PC1 vs. PC2") 
  points(x,y, pch = 16, cex=3, col=my.colors)
  text(x, y,my.sample.names ,col="grey",cex=0.5, pos  = 1)
  legend("topleft",c("6m","24m"),col=c("coral","dodgerblue"),pch=16,bty='n',pt.cex=2)
  dev.off()
  #### #### #### #### #### #### #### 
  
  # expression range
  my.exp.out <- paste(my.outprefix,"_Normalized_counts_boxplot.pdf", sep="_")
  
  pdf(my.exp.out)
  boxplot(tissue.cts,names= my.sample.names,col=my.colors,cex=0.5,ylab="Log2 DESeq2 Normalized counts", las = 2)  
  dev.off()
  
  ### get the heatmap of aging changes at FDR5
  ## exclude NA
  res <- res[!is.na(res$padj),]
  
  genes.aging <- rownames(res)[res$padj < 0.05]
  my.num.aging <- length(genes.aging)
  
  # heatmap drawing - only if there is at least one gene
  my.heatmap.out <- paste(my.outprefix,"_Heatmap_significant_genes.pdf", sep = "_")
  
  pdf(my.heatmap.out, width = 8, height = 5, onefile = F)
  my.heatmap.title <- paste("Aging significant (FDR<5%), ",my.num.aging, " genes",sep="")
  pheatmap(tissue.cts[genes.aging,],
           cluster_cols = F,
           cluster_rows = T,
           colorRampPalette(rev(c("#CC3333","#FF9999","#FFCCCC","white","#CCCCFF","#9999FF","#333399")))(50),
           show_rownames = F, scale="row",
           main = my.heatmap.title, cellwidth = 25)
  dev.off()
  
  my.sig <- res$padj < 0.05
  
  my.volcano.out <- paste(my.outprefix,"_Volcano_plot.pdf", sep = "_")
  
  pdf(my.volcano.out)
  smoothScatter(res$log2FoldChange,-log10(res$padj), col = "grey", xlim=c(-1.5,1.5))
  points(res$log2FoldChange[my.sig],-log10(res$padj)[my.sig], cex= 0.6, col = "red")
  dev.off()
  
#   # do clustering
#   my.pv <- pvclust(tissue.cts,nboot=100)
#   my.heatmap.out <- paste(my.outprefix,"_PVCLUST_result.pdf", sep ="_")
#   
#   pdf(my.heatmap.out)
#   plot(my.pv)
#   dev.off()
  
  # output result tables to files
  my.out.ct.mat <- paste(my.outprefix,"_log2_counts_matrix.txt", sep = "_")
  my.out.stats <- paste(my.outprefix,"_all_genes_statistics.txt", sep = "_")
  my.out.fdr5 <- paste(my.outprefix,"_FDR5_genes_statistics.txt", sep = "_")
  my.out.rdata <- paste(my.outprefix,"_statistics.RData", sep = "_")
  
  write.table(tissue.cts, file = my.out.ct.mat , sep = "\t" , row.names = T, quote=F)
  write.table(res, file = my.out.stats , sep = "\t" , row.names = T, quote=F)
  write.table(res[genes.aging,], file = my.out.fdr5, sep = "\t" , row.names = T, quote=F)
  
  #save(res,file=my.out.rdata)
  
  return(res)
}

#########################################################################################################
