setwd('/Volumes/BB_Backup_3/BD_aging_project/ChIP-seq/All_tissues_analysis/SuperEnhancers_aging_linear_modeling/GLOBAL_ANALYSIS')
options(stringsAsFactors=F)
library(DESeq2)



my.all <- read.table('Output/2017-11-25_Merged_SE_heights_per_gene_for_global_analysis.txt', header=T, sep="\t")

### clean up and normalize
my.filtered.matrix <- data.frame(my.all[,4:dim(my.all)[2]])
rownames(my.filtered.matrix) <- my.all[,1]

age <- rep(0, dim(my.filtered.matrix)[2])
age[grep('3m',colnames(my.filtered.matrix))] <- 3
age[grep('12m',colnames(my.filtered.matrix))] <- 12
age[grep('29m',colnames(my.filtered.matrix))] <- 29
age <- as.numeric(age) # age in months

tissue <- rep("", dim(my.filtered.matrix)[2])
tissue[grep('NPC',colnames(my.filtered.matrix))] <- 'NPCs'
tissue[grep('Liver',colnames(my.filtered.matrix))] <- 'Liver'
tissue[grep('Cere',colnames(my.filtered.matrix))] <- 'Cerebellum'
tissue[grep('OB',colnames(my.filtered.matrix))] <- 'OB'
tissue[grep('heart',colnames(my.filtered.matrix))] <- 'Heart'

# design matrix
dataDesign = data.frame( row.names = colnames( my.filtered.matrix ), age = age, tissue = tissue )

# get matrix using age as a modeling covariate
dds <- DESeqDataSetFromMatrix(countData = round(my.filtered.matrix),
                              colData = dataDesign,
                              design = ~ age + tissue)


dds.deseq <- DESeq(dds)

pdf("Dispersion_plot_global.pdf")
plotDispEsts(dds.deseq)
dev.off()

# perform MDS
tissue.cts <- log2( counts(dds.deseq, normalize = TRUE) + 0.01)

mds.result <- cmdscale(1-cor(tissue.cts,method="spearman"), k = 2, eig = FALSE, add = FALSE, x.ret = FALSE)
x <- mds.result[, 1]
y <- mds.result[, 2]


my.colors <- rep('', dim(my.filtered.matrix)[2])
my.colors[grep('3m',colnames(my.filtered.matrix))] <- "coral"
my.colors[grep('12m',colnames(my.filtered.matrix))] <- "blueviolet"
my.colors[grep('29m',colnames(my.filtered.matrix))] <- "dodgerblue"

# pch: by tissues
my.pchs <- rep(0, dim(my.filtered.matrix)[2])
my.pchs[grep('NPC',colnames(my.filtered.matrix))] <- 11
my.pchs[grep('Liver',colnames(my.filtered.matrix))] <- 8
my.pchs[grep('Cere',colnames(my.filtered.matrix))] <- 5
my.pchs[grep('OB',colnames(my.filtered.matrix))] <- 1
my.pchs[grep('heart',colnames(my.filtered.matrix))] <- 14


# NPC - 11
# Heart - 14
# Cere - 5
# Liver - 8
# OB - 1

pdf("2017-11-29_MDS_SE_height_DESeq_norm_together.pdf")
plot(x, y, xlab = "MDS dimension 1", ylab = "MDS dimension 2",main="SE height Multi-dimensional Scaling",cex=4,col=NULL)
points(x, y, pch=my.pchs,col=my.colors,cex=4, lwd = 1.5)
legend("topleft",c("NPCs","Cerebellum","Olfactory bulb","Heart","Liver"),pch=c(11,5,1,14,8),col="grey",bty='n',pt.cex=1)
legend("topright",c("3m","12m","29m"),col=c("coral","blueviolet","dodgerblue"),pch=16,bty='n',pt.cex=1)
dev.off()


##### do PCA analysis
my.pos.var <- apply(tissue.cts,1,var) >0
my.pca <- prcomp(t(tissue.cts[my.pos.var,]),scale = TRUE)
x <- my.pca$x[,1]
y <- my.pca$x[,2]

my.summary <- summary(my.pca)
my.summary


my.pca.out <- paste(Sys.Date(),"Global_SE_Height_PCA_plot.pdf",sep="")

pdf(my.pca.out)
plot(x,y,pch = 16, cex=3, 
     xlab = paste('PC1 (', round(100*my.summary$importance[,1][2],1),"%)", sep=""),
     ylab = paste('PC2 (', round(100*my.summary$importance[,2][2],1),"%)", sep=""),
     cex.lab = 1, col = NULL) 
points(x,y, cex=4, lwd = 1.5, col=my.colors, pch = my.pchs)
legend("topleft",c("NPCs","Cerebellum","Olfactory bulb","Heart","Liver"),pch=c(11,5,1,14,8),col="grey",bty='n',pt.cex=1)
legend("bottomleft",c("3m","12m","29m"),col=c("coral","blueviolet","dodgerblue"),pch=16,bty='n',pt.cex=1)
dev.off()
