setwd('/Volumes/MyBook_3/BD_aging_project/ChIP-seq/All_tissues_analysis/H3K4me3_Height_aging_linear_modeling/GLOBAL_ANALYSIS/')
options(stringsAsFactors=F)
library(DESeq2)


my.all <- read.table('Output/2017-03-22_Merged_H3K4me3_heights_per_gene_for_global_analysis.txt', header=T, sep="\t")

### clean up and normalize
my.filtered.matrix <- data.frame(my.all[,4:dim(my.all)[2]])
rownames(my.filtered.matrix) <- my.all[,1]

age <- rep(0, dim(my.filtered.matrix)[2])
age[grep('3m',colnames(my.filtered.matrix))] <- 3
age[grep('12m',colnames(my.filtered.matrix))] <- 12
age[grep('29m',colnames(my.filtered.matrix))] <- 29
age <- as.numeric(age) # age in months

tissue <- rep("", dim(my.filtered.matrix)[2])
tissue[grep('NPC',colnames(my.filtered.matrix))] <- 'NPCs'
tissue[grep('Liver',colnames(my.filtered.matrix))] <- 'Liver'
tissue[grep('Cere',colnames(my.filtered.matrix))] <- 'Cerebellum'
tissue[grep('OB',colnames(my.filtered.matrix))] <- 'OB'
tissue[grep('Heart',colnames(my.filtered.matrix))] <- 'Heart'

# design matrix
dataDesign = data.frame( row.names = colnames( my.filtered.matrix ), age = age, tissue = tissue )

# get matrix using age as a modeling covariate
dds <- DESeqDataSetFromMatrix(countData = round(my.filtered.matrix),
                              colData = dataDesign,
                              design = ~ age + tissue)


dds.deseq <- DESeq(dds)

tissue.cts <- log2( counts(dds.deseq, normalize = TRUE) + 0.01)

mds.result <- cmdscale(1-cor(tissue.cts,method="spearman"), k = 2, eig = FALSE, add = FALSE, x.ret = FALSE)
x <- mds.result[, 1]
y <- mds.result[, 2]


my.colors <- rep('', dim(my.filtered.matrix)[2])
my.colors[grep('3m',colnames(my.filtered.matrix))] <- "coral"
my.colors[grep('12m',colnames(my.filtered.matrix))] <- "blueviolet"
my.colors[grep('29m',colnames(my.filtered.matrix))] <- "dodgerblue"

# pch: by tissues
my.pchs <- rep(0, dim(my.filtered.matrix)[2])
my.pchs[grep('NPC',colnames(my.filtered.matrix))] <- 11
my.pchs[grep('Liver',colnames(my.filtered.matrix))] <- 8
my.pchs[grep('Cere',colnames(my.filtered.matrix))] <- 5
my.pchs[grep('OB',colnames(my.filtered.matrix))] <- 1
my.pchs[grep('Heart',colnames(my.filtered.matrix))] <- 14


# NPC - 11
# Heart - 14
# Cere - 5
# Liver - 8
# OB - 1

pdf("2017-04-07_MDS_H3K4me3_height_DESeq_norm_together_BIGPTS.pdf")
plot(x, y, xlab = "MDS dimension 1", ylab = "MDS dimension 2",main="H3K4me3 height Multi-dimensional Scaling",cex=4,col=NULL)
points(x, y, pch=my.pchs,col=my.colors,cex=4, lwd = 1.5)
legend("topleft",c("NPCs","Cerebellum","Olfactory bulb","Heart","Liver"),pch=c(11,5,1,14,8),col="grey",bty='n',pt.cex=1)
legend("bottomleft",c("3m","12m","29m"),col=c("coral","blueviolet","dodgerblue"),pch=16,bty='n',pt.cex=1)
dev.off()


##### do PCA analysis
my.pos.var <- apply(tissue.cts,1,var) >0
my.pca <- prcomp(t(tissue.cts[my.pos.var,]),scale = TRUE)
x <- my.pca$x[,1]
y <- my.pca$x[,2]

my.summary <- summary(my.pca)
my.summary
# Importance of components:
#   PC1     PC2     PC3     PC4      PC5      PC6    PC7     PC8     PC9    PC10    PC11    PC12    PC13    PC14    PC15    PC16   PC17
# Standard deviation     84.9157 68.2804 60.7111 52.5537 18.89715 11.72970 9.9465 9.45553 9.24238 8.88355 8.70571 8.38291 8.13670 7.88824 7.73793 7.63670 7.3821
# Proportion of Variance  0.3569  0.2307  0.1824  0.1367  0.01767  0.00681 0.0049 0.00442 0.00423 0.00391 0.00375 0.00348 0.00328 0.00308 0.00296 0.00289 0.0027
# Cumulative Proportion   0.3569  0.5876  0.7700  0.9067  0.92437  0.93117 0.9361 0.94050 0.94472 0.94863 0.95238 0.95586 0.95913 0.96221 0.96518 0.96806 0.9708


my.pca.out <- paste(Sys.Date(),"Global_H3K4me3_Height_PCA_plot.pdf",sep="")

pdf(my.pca.out)
plot(x,y,pch = 16, cex=3, 
     xlab = paste('PC1 (', round(100*my.summary$importance[,1][2],1),"%)", sep=""),
     ylab = paste('PC2 (', round(100*my.summary$importance[,2][2],1),"%)", sep=""),
     cex.lab = 1, col = NULL) 
points(x,y, cex=4, lwd = 1.5, col=my.colors, pch = my.pchs)
legend("topleft",c("NPCs","Cerebellum","Olfactory bulb","Heart","Liver"),pch=c(11,5,1,14,8),col="grey",bty='n',pt.cex=1)
legend("bottomleft",c("3m","12m","29m"),col=c("coral","blueviolet","dodgerblue"),pch=16,bty='n',pt.cex=1)
dev.off()
