setwd('/Volumes/BB_Backup_3/BD_aging_project/ChIP-seq/All_tissues_analysis/Breadth_aging_linear_modeling/GLOBAL_ANALYSIS/')

options(stringsAsFactors=F)
library(DESeq2)

# 2017-03-20
# global clustering: do norm cross all samples

# 2017-04-07
# increase ouput cex size

# 2017-11-29
# only record breadth at top 5% domains


my.all <- read.table('Output/2017-03-20_Merged_breadth_all_ages_all_tissues.txt', header=F, sep="\t")

my.colnames <- c("RefSeq",
                 "EntrezID",
                 "Symbol",
                 paste("Heart",c(paste("3m",1:2,sep=""),paste("12m",1:2,sep=""),paste("29m",1:2,sep="")), sep="_"),
                 paste("Liver",c(paste("3m",1:2,sep=""),paste("12m",1:2,sep=""),paste("29m",1:2,sep="")), sep="_"),
                 paste("Cerebellum",c(paste("3m",1:2,sep=""),paste("12m",1:2,sep=""),paste("29m",1:2,sep="")), sep="_"),
                 paste("OB",c(paste("3m",1:2,sep=""),paste("12m",1:2,sep=""),paste("29m",1:2,sep="")), sep="_"),
                 paste("NPCs",c(paste("3m",1:2,sep=""),paste("12m",1:2,sep=""),paste("29m",1:2,sep="")), sep="_"),
                 paste("NPCs",c(paste("3m",5:6,sep=""),paste("12m",5:6,sep=""),paste("29m",5:6,sep="")), sep="_")
                       )

colnames(my.all) <- my.colnames

### clean up and normalize
my.null <- which(apply(my.all[,4:length(my.colnames)], 1, sum) <= 1) # see deseq2 vignetter

my.filtered.matrix <- my.all[-my.null,4:length(my.colnames)]
rownames(my.filtered.matrix) <- my.all[-my.null,1]

age <- as.numeric(c(rep(3,2),rep(12,2),rep(29,2) ,
                    rep(3,2),rep(12,2),rep(29,2),
                    rep(3,2),rep(12,2),rep(29,2),
                    rep(3,2),rep(12,2),rep(29,2),
                    rep(3,2),rep(12,2),rep(29,2),
                    rep(3,2),rep(12,2),rep(29,2))) # age in months

tissue <- c(rep("liver",6),rep("heart",6),rep("cereb",6),rep("OB",6),rep("NPCs",12))

# design matrix
dataDesign = data.frame( row.names = colnames( my.filtered.matrix ), age = age, tissue = tissue )

# get matrix using age as a modeling covariate
dds <- DESeqDataSetFromMatrix(countData = my.filtered.matrix,
                              colData = dataDesign,
                              design = ~ age + tissue)


dds.deseq <- DESeq(dds)

pdf("Dispersion_plot_global.pdf")
plotDispEsts(dds.deseq)
dev.off()

# get normalized data across tissues
tissue.cts <- log2( counts(dds.deseq, normalize = TRUE) + 0.01)

# MDS on breadth for all samples
mds.result <- cmdscale(1-cor(tissue.cts,method="spearman"), k = 2, eig = FALSE, add = FALSE, x.ret = FALSE)
x <- mds.result[, 1]
y <- mds.result[, 2]

my.colors <- c(rep("coral",2), rep("blueviolet", 2),rep("dodgerblue",2),
               rep("coral",2), rep("blueviolet", 2),rep("dodgerblue",2),
               rep("coral",2), rep("blueviolet", 2),rep("dodgerblue",2),
               rep("coral",2), rep("blueviolet", 2),rep("dodgerblue",2),
               rep("coral",2), rep("blueviolet", 2),rep("dodgerblue",2),
               rep("coral",2), rep("blueviolet", 2),rep("dodgerblue",2))

# pch: by tissues
my.pchs <- c(rep(8,6),rep(14,6),rep(5,6),rep(1,6),rep(11,12))

# NPC - 11
# Heart - 14
# Cere - 5
# Liver - 8
# OB - 1

pdf("2017-04-07_MDS_RNAseq_DESeq_norm_together_BIGPOINTS.pdf")
plot(x, y, xlab = "MDS dimension 1", ylab = "MDS dimension 2",main="H3K4me3 breadth Multi-dimensional Scaling",cex=2,col=NULL)
points(x, y, pch=my.pchs,col=my.colors,cex=4, lwd = 1.5)
legend("topleft",c("NPCs","Cerebellum","Olfactory bulb","Heart","Liver"),pch=c(11,5,1,14,8),col="grey",bty='n',pt.cex=1)
legend("bottomleft",c("3m","12m","29m"),col=c("coral","blueviolet","dodgerblue"),pch=16,bty='n',pt.cex=1)
dev.off()

##### do PCA analysis
my.pos.var <- apply(tissue.cts,1,var) >0
my.pca <- prcomp(t(tissue.cts[my.pos.var,]),scale = TRUE)
x <- my.pca$x[,1]
y <- my.pca$x[,2]

my.summary <- summary(my.pca)

my.pca.out <- paste(Sys.Date(),"Global_H3K4me3_Breadth_PCA_plot.pdf",sep="")

pdf(my.pca.out)
plot(x,y,pch = 16, cex=4, 
     xlab = paste('PC1 (', round(100*my.summary$importance[,1][2],1),"%)", sep=""),
     ylab = paste('PC2 (', round(100*my.summary$importance[,2][2],1),"%)", sep=""),
     cex.lab = 1, col = NULL) 
points(x,y, cex=4, lwd = 1.5, col=my.colors, pch = my.pchs)
legend("topleft",c("NPCs","Cerebellum","Olfactory bulb","Heart","Liver"),pch=c(11,5,1,14,8),col="grey",bty='n',pt.cex=1)
legend("bottomleft",c("3m","12m","29m"),col=c("coral","blueviolet","dodgerblue"),pch=16,bty='n',pt.cex=1)
dev.off()



########################################################################################################
# MDS on breadth for samples top 5% (Broad Domains defined in Benayoun, Pollina, Ucar et al, 2014)

tissue.cts.v2 <- counts(dds.deseq, normalize = TRUE)

# get percentile breadth function
get_percentile <- function (my.vec) {
  my.ecdf <- ecdf(my.vec)
  my.qts <- my.ecdf(my.vec)
  return(my.qts)
}

my.quantiles <- apply(tissue.cts.v2,2,get_percentile)


# remove values from non top 5% broadest
tissue.cts.v2[my.quantiles < 0.95] <- 0

mds.result <- cmdscale(1-cor(log2(tissue.cts.v2 + 0.01),method="spearman"), k = 2, eig = FALSE, add = FALSE, x.ret = FALSE)
x <- mds.result[, 1]
y <- mds.result[, 2]

my.colors <- c(rep("coral",2), rep("blueviolet", 2),rep("dodgerblue",2),
               rep("coral",2), rep("blueviolet", 2),rep("dodgerblue",2),
               rep("coral",2), rep("blueviolet", 2),rep("dodgerblue",2),
               rep("coral",2), rep("blueviolet", 2),rep("dodgerblue",2),
               rep("coral",2), rep("blueviolet", 2),rep("dodgerblue",2),
               rep("coral",2), rep("blueviolet", 2),rep("dodgerblue",2))

# pch: by tissues
my.pchs <- c(rep(8,6),rep(14,6),rep(5,6),rep(1,6),rep(11,12))

# NPC - 11
# Heart - 14
# Cere - 5
# Liver - 8
# OB - 1

pdf("2017-11-29_MDS_RNAseq_DESeq_norm_together_Top5percent_only.pdf")
plot(x, y, xlab = "MDS dimension 1", ylab = "MDS dimension 2",main="H3K4me3 breadth Multi-dimensional Scaling (Top5% only)",cex=2,col=NULL)
points(x, y, pch=my.pchs,col=my.colors,cex=4, lwd = 1.5)
legend("topleft",c("NPCs","Cerebellum","Olfactory bulb","Heart","Liver"),pch=c(11,5,1,14,8),col="grey",bty='n',pt.cex=1)
legend("bottomleft",c("3m","12m","29m"),col=c("coral","blueviolet","dodgerblue"),pch=16,bty='n',pt.cex=1)
dev.off()
