#setwd('/Volumes/MyBook_3/BD_aging_project/Public_datasets/Killifish_aging_RNAseq/Brain_files_for_BB//kallisto_results')
setwd('/Users/benayoun/Dropbox/Manuscripts_and_Publications/2018_aging_epigenomics_data_description/Aging_omics_paper/Github_folder/Figure5_Conservation/Comparisons/Killifish_aging_RNAseq/Brain/')
options(stringsAsFactors = F)

# 2017-05-15
# analyze Fish Brain RNAseq
# using param's kallisto runs

library(DESeq2)
library(pheatmap)
library('pvclust')

# read in Kallisto mappings
my.data <- read.csv("2017-05-15_Brain_Nfur_aging_kallisto_mapping.txt", sep = "\t", header = T)

# sum read over genes (to not have results over transcripts for DEseq2)
my.data.per.gene <- aggregate(my.data[,6:30],by=list(my.data$ENS_GID),FUN=sum) # 24724

# round counts (DESeq needs integers)
my.data.per.gene[,2:26] <- round(my.data.per.gene[,2:26])
rownames(my.data.per.gene) <- my.data.per.gene$Group.1
colnames(my.data.per.gene)[1] <- 'GeneName'

# get the genes with no reads out
my.null <- which(apply(my.data.per.gene[,2:26], 1, sum) <= 5) # see deseq2 vignetter
my.filtered.matrix <- my.data.per.gene[-my.null,2:26] # 24131 genes

# age in weeks
my.age <- rep(c(5, 12,20,27,39), each = 5)

# design matrix
dataDesign = data.frame( row.names = colnames( my.filtered.matrix ), 
                         age = my.age)

# get matrix using age as a modeling covariate
dds <- DESeqDataSetFromMatrix(countData = my.filtered.matrix,
                              colData = dataDesign,
                              design = ~ age)

# run DESeq normalizations and export results
dds.deseq <- DESeq(dds)

res <- results(dds.deseq, name = "age") # added the name of the tested variable

# plot dispersion
my.disp.out <- paste(Sys.Date(),"brain_dispersion_plot.pdf")

pdf(my.disp.out)
plotDispEsts(dds.deseq)
dev.off()

# parse sample names
my.sample.names <- paste(my.age,"w",1:25, sep = "")

# normalized expression value
tissue.cts <- log2( counts(dds.deseq, normalize = TRUE) + 0.01)
colnames(tissue.cts) <- my.sample.names

# do MDS analysis
mds.result <- cmdscale(1-cor(tissue.cts,method="spearman"), k = 2, eig = FALSE, add = FALSE, x.ret = FALSE)
x <- mds.result[, 1]
y <- mds.result[, 2]

my.palette <- colorRampPalette(c("coral","blueviolet","dodgerblue"))(5)

my.colors <- c(rep(my.palette[1],5), rep(my.palette[2],5),rep(my.palette[3],5),rep(my.palette[4],5),rep(my.palette[5],5))

my.mds.out <- paste(Sys.Date(),"killifish_Brain_aging_analysis_MDS_plot.pdf", sep ="_")

pdf(my.mds.out)
plot(x, y, xlab = "MDS dimension 1", ylab = "MDS dimension 2",main="Multi-dimensional Scaling",cex=2)
points(x, y, pch=16,col=my.colors,cex=2)
text(x, y,my.sample.names ,col="grey",cex=0.5, pos  = 1)
legend("topleft",c("5w","12w","20w","27w","39w"),col=my.colors[c(1,6,11,16,21)],pch=16,bty='n',pt.cex=2)
dev.off()


#### PCA #### 
my.pos.var <- apply(tissue.cts,1,var) >0
# do PCA analysis
my.pca <- prcomp(t(tissue.cts[my.pos.var,]),scale = TRUE)
x <- my.pca$x[,1]
y <- my.pca$x[,2]
z <- my.pca$x[,3]

my.summary <- summary(my.pca)
# Importance of components:
# PC1     PC2      PC3      PC4      PC5      PC6      PC7      PC8      PC9     PC10     PC11     PC12     PC13     PC14     PC15     PC16     PC17     PC18     PC19     PC20     PC21
# Standard deviation     72.5664 57.9373 41.91561 39.63695 34.40400 31.98301 30.00671 28.82122 28.14969 26.82923 25.79114 25.46125 24.14572 23.67060 22.55119 22.05219 21.81238 21.08444 20.89048 20.35343 19.88071
# Proportion of Variance  0.2182  0.1391  0.07281  0.06511  0.04905  0.04239  0.03731  0.03442  0.03284  0.02983  0.02757  0.02686  0.02416  0.02322  0.02107  0.02015  0.01972  0.01842  0.01809  0.01717  0.01638
# Cumulative Proportion   0.2182  0.3573  0.43013  0.49524  0.54429  0.58668  0.62399  0.65842  0.69125  0.72108  0.74865  0.77551  0.79967  0.82289  0.84397  0.86412  0.88384  0.90226  0.92034  0.93751  0.95389

my.pca.out <- paste(Sys.Date(),"killifish_Brain_aging_PCA_plot.pdf",sep="")

pdf(my.pca.out)
par(mfrow=c(2,2))
plot(x,y,pch = 1, cex=3, 
     xlab = paste('PC1 (', round(100*my.summary$importance[,1][2],1),"%)", sep=""),
     ylab = paste('PC2 (', round(100*my.summary$importance[,2][2],1),"%)", sep=""),
     cex.lab = 1.5, main = "PC1 vs. PC2") 
points(x,y, pch = 16, cex=3, col=my.colors)
text(x, y,my.sample.names ,col="grey",cex=0.5, pos  = 1)
legend("topleft",c("5w","12w","20w","27w","39w"),col=my.colors[c(1,6,11,16,21)],pch=16,bty='n',pt.cex=2)

plot(x,z,pch = 1, cex=3, 
     xlab = paste('PC1 (', round(100*my.summary$importance[,1][2],1),"%)", sep=""),
     ylab = paste('PC3 (', round(100*my.summary$importance[,3][2],1),"%)", sep=""),
     cex.lab = 1.5, main = "PC1 vs. PC3") 
points(x,z, pch = 16, cex=3, col=my.colors)
text(x, z,my.sample.names ,col="grey",cex=0.5, pos  = 1)

plot(y,z,pch = 1, cex=3, 
     xlab = paste('PC2 (', round(100*my.summary$importance[,2][2],1),"%)", sep=""),
     ylab = paste('PC3 (', round(100*my.summary$importance[,3][2],1),"%)", sep=""),
     cex.lab = 1.5, main = "PC1 vs. PC3") 
points(y,z, pch = 16, cex=3, col=my.colors)
text(y, z,my.sample.names ,col="grey",cex=0.5, pos  = 1)
par(mfrow=c(1,1))
dev.off()
#### #### #### #### #### #### #### 

# expression range
my.exp.out <- paste(Sys.Date(),"_Normalized_counts_boxplot.pdf")

pdf(my.exp.out)
boxplot(tissue.cts,names= my.sample.names,col=my.colors,cex=0.5,ylab="Log2 DESeq2 Normalized counts", las = 2)  
dev.off()

### get the heatmap of aging changes at FDR5
## exclude NA
res <- res[!is.na(res$padj),]

genes.aging <- rownames(res)[res$padj < 0.05]
my.num.aging <- length(genes.aging) # 6587

# heatmap drawing - only if there is at least one gene
my.heatmap.out <- paste(Sys.Date(),"_Heatmap_significant_genes.pdf", sep = "_")

pdf(my.heatmap.out, width = 8, height = 5, onefile = F)
my.heatmap.title <- paste("Aging significant (FDR<5%), ",my.num.aging, " genes",sep="")
pheatmap(tissue.cts[genes.aging,],
         cluster_cols = F,
         cluster_rows = T,
         colorRampPalette(rev(c("#CC3333","#FF9999","#FFCCCC","white","#CCCCFF","#9999FF","#333399")))(50),
         show_rownames = F, scale="row",
         main = my.heatmap.title, cellwidth = 25)
dev.off()

my.sig <- res$padj < 0.05

my.volcano.out <- paste(Sys.Date(),"_Volcano_plot.pdf", sep = "_")

pdf(my.volcano.out)
smoothScatter(res$log2FoldChange,-log10(res$padj), col = "grey", xlim=c(-1.5,1.5))
points(res$log2FoldChange[my.sig],-log10(res$padj)[my.sig], cex= 0.6, col = "red")
dev.off()

# do clustering
my.pv <- pvclust(tissue.cts,nboot=100)
my.heatmap.out <- paste(Sys.Date(),"_PVCLUST_result.pdf")

pdf(my.heatmap.out)
plot(my.pv)
dev.off()

# output result tables to files
my.outprefix <- paste(Sys.Date(),"Killifish_RNAseq_aging", sep = "_")
my.out.ct.mat <- paste(my.outprefix,"_log2_counts_matrix.txt", sep = "_")
my.out.stats <- paste(my.outprefix,"_all_genes_statistics.txt", sep = "_")
my.out.fdr5 <- paste(my.outprefix,"_FDR5_genes_statistics.txt", sep = "_")
my.out.rdata <- paste(my.outprefix,"_statistics.RData", sep = "_")

write.table(tissue.cts, file = my.out.ct.mat , sep = "\t" , row.names = T, quote=F)
write.table(res, file = my.out.stats , sep = "\t" , row.names = T, quote=F)
write.table(res[genes.aging,], file = my.out.fdr5, sep = "\t" , row.names = T, quote=F)

save(res,file=my.out.rdata)




############################################################################################################################################
###########################################                   Mouse orthologs                    ###########################################
############################################################################################################################################
options(stringsAsFactors=F)
library(bitops)

# compare to mouse brain results

# get ortholog names
# use Param's BLAST results
my.orthology <- read.csv('/Users/benayoun/Dropbox/Manuscripts_and_Publications/2018_aging_epigenomics_data_description/Aging_omics_paper/Github_folder/Figure5_Conservation/Pathway_enrichment/Orthology/BestHits_nfur-mmus_1e-3.txt', sep = "\t", header = F)

get_first <- function (vec) {
  return(vec[1])
}

my.nfur.names <- unlist(lapply(strsplit(my.orthology$V1,"|",fixed = T),get_first))
# length(my.nfur.names)
# [1] 21073

my.mouse.names <- unlist(lapply(strsplit(my.orthology$V2,"|",fixed = T),get_first))
length(my.mouse.names)
# [1] 21073

my.orth.table <- data.frame(cbind(my.nfur.names,my.mouse.names))
colnames(my.orth.table) <- c("Nfur_Symbol","Mouse_Symbol")

#
load("/Users/benayoun/Dropbox/Manuscripts_and_Publications/2018_aging_epigenomics_data_description/Aging_omics_paper/Github_folder/Figure5_Conservation/Comparisons/Killifish_aging_RNAseq/Brain/Output/2017-05-15_Killifish_RNAseq_aging__statistics.RData")
my.brain.nfur.process <- res
my.brain.nfur.process$Nfur_Symbol <- rownames(my.brain.nfur.process)

load("/Users/benayoun/Dropbox/Manuscripts_and_Publications/2018_aging_epigenomics_data_description/Aging_omics_paper/Github_folder/Figure3_Machine_learning/Feature_extraction/Feature_folders/RNAseq_DEseq2_results/RNA_seq_result_cereb_2015-11-19.RData")
my.cereb.RNAseq.process[[1]]$Mouse_Symbol <- rownames(my.cereb.RNAseq.process[[1]])

#### brain #####
my.brain.merge1 <- merge(data.frame(my.brain.nfur.process),my.orth.table)
my.brain.merge2 <- merge(data.frame(my.cereb.RNAseq.process[[1]]),my.brain.merge1, by='Mouse_Symbol')

my.mouse.sig <- my.brain.merge2$padj.x < 0.05 # 897
my.nfur.sig <- my.brain.merge2$padj.y < 0.05 # 5404
my.both.sig <- bitAnd(my.mouse.sig,my.nfur.sig)>0 # 319

test.mouse.neg <- wilcox.test(my.brain.merge2$log2FoldChange.y[my.mouse.sig & (my.brain.merge2$log2FoldChange.x <0)], alternative = "less")
test.mouse.pos <- wilcox.test(my.brain.merge2$log2FoldChange.y[my.mouse.sig & (my.brain.merge2$log2FoldChange.x >0)], alternative = "greater")

pdf(paste(Sys.Date(),"LogFC_of_mouse_orthologs_brain_Nfur.pdf",sep="_"))
boxplot(my.brain.merge2$log2FoldChange.y[my.mouse.sig & (my.brain.merge2$log2FoldChange.x <0)],
        my.brain.merge2$log2FoldChange.y[my.mouse.sig & (my.brain.merge2$log2FoldChange.x >0)],
        names = c("Mouse Ortholog Down","Mouse Ortholog Up"),
        ylab = "nfur brain aging log2FC per y",
        ylim = c(-0.08,0.08), col = c("cadetblue2","brown1"),
        main = "brain (Nfur)"
)
text(1,0.07,signif(test.mouse.neg$p.value, digits = 3))
text(2,0.07,signif(test.mouse.pos$p.value, digits = 3))
abline(h=0,lty="dashed", col = "red", lwd = 2)
dev.off()
