setwd('/Volumes/BB_USC_2/Aging_RNA_New_species/GSE61260_RAW')
options(stringsAsFactors = F)

library("affy")
library("limma")

# 2017-10-04
# analysis liver aging data with covariates

############################
# will use the provided normalized data since this design is not supoported by bioconductor

my.liver.metadata <- read.csv('GSE61260_parsed_metadata.txt', header = T, sep = "\t")
my.liver.data <- read.csv('GSE61260_datLiverNormalizedExpr.csv', header = T)


# see whether data looks normalized
pdf(paste(Sys.Date(),"sample_boxplot.pdf", sep ="_"))
boxplot(my.liver.data[,-1])
dev.off()

rownames(my.liver.data) <- my.liver.data[,1]
# does not work : multiple entries for one gene!!!

# summarize, get mean value per gene
my.liver.data.gene <- aggregate(my.liver.data[,-c(1)],by = list(c(my.liver.data$ID_REF)),FUN = 'mean')
colnames(my.liver.data.gene) <- c("GeneName",my.liver.metadata$SampleID)
rownames(my.liver.data.gene) <- my.liver.data.gene[,1]

# get age
my.age <- as.numeric(my.liver.metadata$Age)
my.sex <- as.factor(my.liver.metadata$Sex)
my.bmi <- as.numeric(my.liver.metadata$BMI)
my.disease <- as.factor(my.liver.metadata$DiseaseStatus)

my.unique.ages <- sort(unique(my.age))
# do MDS analysis
mds.result <- cmdscale(1-cor(my.liver.data.gene[,-1],method="spearman"), k = 2, eig = FALSE, add = FALSE, x.ret = FALSE)
x <- mds.result[, 1]
y <- mds.result[, 2]

my.pch <- paste(my.age,"y",sep="")
my.colors <- colorRampPalette(c("coral","blueviolet","dodgerblue"))(length(my.pch))
my.sorted.ages <- sort(my.age, index.return = T, decreasing = FALSE)

pdf(paste(Sys.Date(),"Horvath_aging_Liver.pdf",sep="_"))
plot(x, y, xlab = "MDS dimension 1", ylab = "MDS dimension 2",main="Multi-dimensional Scaling",cex=2, col = NULL)
text(x,y,my.pch,col=my.colors[my.sorted.ages$ix])
dev.off()

#######################################################
########### A. both sexes together 

# fit limma model
model <- model.matrix(~ my.age + my.sex + my.bmi + my.disease, data = my.liver.data.gene[,-1])
fit <- lmFit(ExpressionSet(assayData=as.matrix(my.liver.data.gene[,-1])), model)
fit.eb <- eBayes(fit)

my.sig <- topTable(fit.eb, coef = "my.age", p.value = 1 , number = Inf)
save(my.sig, file = paste(Sys.Date(),"Horvath_Human_Liver_aging_bothSexes.RData",sep="_"))
write.table(my.sig, file = paste(Sys.Date(),"Horvath_Human_Liver_aging_bothSexes.txt",sep="_"), quote = F, sep = "\t")

# ### Regress out batch
# mod <- coefficients(fit)[,-c(1:2)] %*% t(fit$design[,-c(1:2)]) ### I keep only age and intercept
# my.liver.data.gene.corrected <- my.liver.data.gene[,-1] - mod
# 


#######################################################
########### B. only male

my.males <- my.sex %in% 'male'
my.age <- my.age[my.males]
my.bmi <- my.bmi[my.males]
my.disease <- my.disease[my.males]

# fit limma model
model <- model.matrix(~ my.age + my.bmi + my.disease, data = my.liver.data.gene[,-c(1, 1+which(!my.males))])
fit <- lmFit(ExpressionSet(assayData=as.matrix(my.liver.data.gene[,-1])), model)
fit.eb <- eBayes(fit)

my.sig <- topTable(fit.eb, coef = "my.age", p.value = 1 , number = Inf)
save(my.sig, file = paste(Sys.Date(),"Horvath_Human_Liver_aging_Male_Only.RData",sep="_"))
write.table(my.sig, file = paste(Sys.Date(),"Horvath_Human_Liver_aging_Male_Only.txt",sep="_"), quote = F, sep = "\t")

