cor(my.data.pos[my.sel,1],my.data.pos[my.sel,2],method="spearman")
my.sel <- which(my.data.pos[,1] < 0.5)
cor(my.data.pos[my.sel,1],my.data.pos[my.sel,2],method="spearman")
my.sel <- which(my.data.pos[,1] > 0.6)
cor(my.data.pos[my.sel,1],my.data.pos[my.sel,2],method="spearman")
my.sel <- which(my.data.pos[,1] < 0.6)
cor(my.data.pos[my.sel,1],my.data.pos[my.sel,2],method="spearman")
my.sel <- which(my.data.pos[,1] > 0.7)
cor(my.data.pos[my.sel,1],my.data.pos[my.sel,2],method="spearman")
my.sel <- which(my.data.pos[,1] < 0.7)
cor(my.data.pos[my.sel,1],my.data.pos[my.sel,2],method="spearman")
#0.2300984
my.H1.length.v1 <- read.table('~/Desktop/TEST/H1_expressed_H3K4me3_intersect.KNOWN.bedbed', header=FALSE, sep="\t")
my.H1.length.v1 <- read.table('~/Desktop/TEST/H1_expressed_H3K4me3_intersect.KNOWN.bed', header=FALSE, sep="\t")
peak_score <- my.H1.length.v1[,10]
broad.ids <- getBroadIds.2 (my.H1.length.v1,0.95)
my.H1.length.v1.redux <- my.H1.length.v1[-which(peak_score %in% -1),]
exp_level.2 <- my.H1.length.v1.redux[,5]
peak_score.2 <- my.H1.length.v1.redux[,10]
my.exp.broad.H1 <- exp_level.2[broad.ids]
my.exp.nonbroad.H1 <- exp_level.2[-broad.ids]
boxplot(cbind(my.exp.broad.H1,my.exp.nonbroad.H1),outline = FALSE,
names=c("Broad", "Non-broad"), ylab = "Expression level (fkpm)",
main = "H1")
peak_length.H1 <- my.H1.length.v1[-which(peak_score %in% -1),8] - my.H1.length.v1[-which(peak_score %in% -1),7]
Fn <- ecdf(peak_length.H1)
my.full.quartiles <- Fn(peak_length.H1)
my.null <- which(exp_level.2 == 0)
my.data.pos <- cbind(my.full.quartiles[-my.null],exp_level.2[-my.null])
my.broad.H1 <- which(my.data.pos[,1] >= 0.95)
plot(my.data.pos,log='y',col=rgb(205,51,1,30,maxColorValue=255), pch=16)
my.data.pos <- cbind(log10(peak_length.H1[-my.null]),log10(exp_level.2[-my.null]))
plot(my.data.pos,col=rgb(205,51,1,30,maxColorValue=255), pch=16)
my.data.pos <- cbind(my.full.quartiles[-my.null],exp_level.2[-my.null])
cor(my.data.pos[,1],my.data.pos[,2],method="spearman")
my.data.pos <- cbind(log10(peak_length.H1[-my.null]),log10(exp_level.2[-my.null]))
plot(my.data.pos,col=rgb(205,51,1,30,maxColorValue=255), pch=16)
my.data.pos <- cbind(log10(peak_length.H1[-my.null]),log10(exp_level.2[-my.null]))
plot(my.data.pos,col=rgb(205,51,1,30,maxColorValue=255), pch=16)
cor(my.data.pos[,1],my.data.pos[,2],method="spearman")
#0.3228654
my.data.pos <- cbind(my.full.quartiles[-my.null],exp_level.2[-my.null])
my.broad.H1 <- which(my.data.pos[,1] >= 0.95)
plot(my.data.pos,log='y',col=rgb(205,51,1,30,maxColorValue=255), pch=16)
my.C2C12.length.v1 <- read.table('~/Desktop/TEST/C2C12_MT_H3K4me3_vs_expressionREDUX.bed', header=FALSE, sep="\t")
peak_score <- my.C2C12.length.v1[,10]
head(my.C2C12.length.v1)
my.C2C12.length.v1 <- read.table('~/Desktop/TEST/C2C12_MT_H3K4me3_vs_expressionREDUX.bed', header=FALSE, sep="\t")
peak_score <- my.C2C12.length.v1[,10]
head(my.C2C12.length.v1)
peak_score <- my.C2C12.length.v1[,7]
broad.ids <- getBroadIds.2 (my.C2C12.length.v1,0.95)
my.C2C12.length.v1.redux <- my.C2C12.length.v1[-which(peak_score %in% -1),]
exp_level.2 <- my.C2C12.length.v1.redux[,5]
peak_score.2 <- my.C2C12.length.v1.redux[,10]
my.exp.broad.C2C12 <- exp_level.2[broad.ids]
my.exp.nonbroad.C2C12 <- exp_level.2[-broad.ids]
boxplot(cbind(my.exp.broad.C2C12,my.exp.nonbroad.C2C12),outline = FALSE, names=c("Broad", "Non-broad"), ylab = "Expression level in mESCs (fkpm)", main = "C2C12 MB")
boxplot(cbind(my.exp.broad.C2C12,my.exp.nonbroad.C2C12),outline = FALSE,
names=c("Broad", "Non-broad"), ylab = "Expression level in mESCs (fkpm)", main = "C2C12 MT")
peak_length.C2C12 <- my.C2C12.length.v1[-which(peak_score %in% -1),8] - my.C2C12.length.v1[-which(peak_score %in% -1),7]
Fn <- ecdf(peak_length.C2C12)
my.full.quartiles <- Fn(peak_length.C2C12)
my.null <- which(exp_level.2 == 0)
my.data.pos <- cbind(my.full.quartiles[-my.null],exp_level.2[-my.null])
my.broad.C2C12 <- which(my.data.pos[,1] >= 0.95)
plot(my.data.pos,log='y',col=rgb(205,51,1,30,maxColorValue=255), pch=16)
my.data.pos <- cbind(log10(peak_length.C2C12[-my.null]),log10(exp_level.2[-my.null]))
plot(my.data.pos,col=rgb(205,51,1,30,maxColorValue=255), pch=16)
my.data.pos <- cbind(my.full.quartiles[-my.null],exp_level.2[-my.null])
my.broad.C2C12 <- which(my.data.pos[,1] >= 0.95)
plot(my.data.pos,log='y',col=rgb(205,51,1,30,maxColorValue=255), pch=16)
cor(my.full.quartiles[-my.null],exp_level.2[-my.null],method="spearman")
my.C2C12.length.v1 <- read.table('~/Desktop/TEST/C2C12_MB_H3K4me3_vs_expressionREDUX.bed', header=FALSE, sep="\t")
peak_score <- my.C2C12.length.v1[,10]
broad.ids <- getBroadIds.2 (my.C2C12.length.v1,0.95)
my.C2C12.length.v1.redux <- my.C2C12.length.v1[-which(peak_score %in% -1),]
exp_level.2 <- my.C2C12.length.v1.redux[,5]
peak_score.2 <- my.C2C12.length.v1.redux[,10]
my.exp.broad.C2C12 <- exp_level.2[broad.ids]
my.exp.nonbroad.C2C12 <- exp_level.2[-broad.ids]
boxplot(cbind(my.exp.broad.C2C12,my.exp.nonbroad.C2C12),outline = FALSE, names=c("Broad", "Non-broad"), ylab = "Expression level in mESCs (fkpm)", main = "C2C12 MB")
peak_length.C2C12 <- my.C2C12.length.v1[-which(peak_score %in% -1),8] - my.C2C12.length.v1[-which(peak_score %in% -1),7]
Fn <- ecdf(peak_length.C2C12)
my.full.quartiles <- Fn(peak_length.C2C12)
my.null <- which(exp_level.2 == 0)
my.data.pos <- cbind(my.full.quartiles[-my.null],exp_level.2[-my.null])
my.broad.C2C12 <- which(my.data.pos[,1] >= 0.95)
plot(my.data.pos,log='y',col=rgb(205,51,1,30,maxColorValue=255), pch=16)
cor(my.full.quartiles[-my.null],exp_level.2[-my.null],method="spearman")
my.data <- read.csv('Downloads/deseq2matricesforupstreamregulatoranalysis/2015-11-19_Liver_DESeq2_LINEAR_model_with_age _all_genes_statistics.txt')
head(my.data)
my.data <- read.csv('Downloads/deseq2matricesforupstreamregulatoranalysis/2015-11-19_Liver_DESeq2_LINEAR_model_with_age _all_genes_statistics.txt',
header=T, sep="\t")
head(my.data)
boxplot(my.data$log2FoldChange[my.data$padj < 0.05],my.data$log2FoldChange[my.data$padj < 0.05])
plot(boxplot(my.data$log2FoldChange,my.data$padj)
)
plot(my.data$log2FoldChange,my.data$padj)
plot(my.data$log2FoldChange,-my.data$padj)
plot(my.data$log2FoldChange,-log10(my.data$padj))
plot(my.data$log2FoldChange,-log10(my.data$padj), cex=0.5, color = "grey")
plot(my.data$log2FoldChange,-log10(my.data$padj), cex=0.5, col = "grey")
plot(my.data$log2FoldChange,-log10(my.data$padj), cex=0.5, col = "darkgrey")
points(my.data$log2FoldChange[my.data$padj < 0.05],-log10(my.data$padj)[my.data$padj < 0.05], cex=0.5, pch = 16, col = "red")
plot(my.data$log2FoldChange,-log10(my.data$padj), cex=0.5, col = "darkgrey", xlim = c(-0.08, 0.08))
points(my.data$log2FoldChange[my.data$padj < 0.05],-log10(my.data$padj)[my.data$padj < 0.05], cex=0.5, pch = 16, col = "red")
abline(v=0, lty = "dashed")
plot(my.data$log2FoldChange,-log10(my.data$padj), cex=0.5, col = "darkgrey", xlim = c(-0.08, 0.08),
main="Liver")
points(my.data$log2FoldChange[my.data$padj < 0.05],-log10(my.data$padj)[my.data$padj < 0.05], cex=0.5, pch = 16, col = "red")
abline(v=0, lty = "dashed")
summary(my.data$log2FoldChange[my.data$padj < 0.05])
summary(2^my.data$log2FoldChange[my.data$padj < 0.05])
plot(my.data$log2FoldChange,-log10(my.data$padj), cex=0.5, col = "darkgrey", xlim = c(-0.08, 0.08),
main="Liver")
points(my.data$log2FoldChange[my.data$padj < 0.1],-log10(my.data$padj)[my.data$padj < 0.1], cex=0.5, pch = 16, col = "pink")
points(my.data$log2FoldChange[my.data$padj < 0.05],-log10(my.data$padj)[my.data$padj < 0.05], cex=0.5, pch = 16, col = "red")
abline(v=0, lty = "dashed")
summary(my.data$log2FoldChange)
my.data <- read.csv('Downloads/deseq2matricesforupstreamregulatoranalysis/2015-11-19_NPCs_DESeq2_LINEAR_model_with_age _all_genes_statistics.txt _all_genes_statistics.txt',
header=T, sep="\t")
my.data <- read.csv('Downloads/deseq2matricesforupstreamregulatoranalysis/2015-11-19_NPCs_DESeq2_LINEAR_model_with_age _all_genes_statistics.txt',
header=T, sep="\t")
plot(my.data$log2FoldChange,-log10(my.data$padj), cex=0.5, col = "darkgrey", xlim = c(-0.08, 0.08),
main="Liver")
points(my.data$log2FoldChange[my.data$padj < 0.05],-log10(my.data$padj)[my.data$padj < 0.05], cex=0.5, pch = 16, col = "red")
abline(v=0, lty = "dashed")
my.data$log2FoldChange
plot(my.data$log2FoldChange,-log10(my.data$padj), cex=0.5)
my.data <- read.csv('Downloads/deseq2matricesforupstreamregulatoranalysis/2015-11-19_Cerebellum_DESeq2_LINEAR_model_with_age _all_genes_statistics.txt',
header=T, sep="\t")
plot(my.data$log2FoldChange,-log10(my.data$padj), cex=0.5, col = "darkgrey", xlim = c(-0.08, 0.08),
main="Liver")
points(my.data$log2FoldChange[my.data$padj < 0.05],-log10(my.data$padj)[my.data$padj < 0.05], cex=0.5, pch = 16, col = "red")
abline(v=0, lty = "dashed")
my.data <- read.csv('Downloads/deseq2matricesforupstreamregulatoranalysis/2016-03-14_SCDE_NPC_aging_differential_expression_analysis_with_pvals.txt',
header=T, sep="\t")
plot(my.data$log2FoldChange,-log10(my.data$padj), cex=0.5, col = "darkgrey", xlim = c(-0.08, 0.08),
main="Liver")
head(my.data)
plot(my.data$Z,-log10(my.data$p_adj), cex=0.5, col = "darkgrey", xlim = c(-0.08, 0.08),
main="Liver")
plot(my.data$Z,-log10(my.data$p_adj), cex=0.5, col = "darkgrey",
main="scNPC")
main="scNPC")
points(my.data$Z[my.data$p_adj < 0.05],-log10(my.data$p_adj)[my.data$p_adj < 0.05], cex=0.5, pch = 16, col = "red")
abline(v=0, lty = "dashed")
sum(my.data$p_adj < 0.05)
my.data <- read.csv('Downloads/deseq2matricesforupstreamregulatoranalysis/2015-11-19_OlfactoryBulb_DESeq2_LINEAR_model_with_age _all_genes_statistics.txt',
header=T, sep="\t")
plot(my.data$log2FoldChange,-log10(my.data$padj), cex=0.5, col = "darkgrey", xlim = c(-0.08, 0.08),
main="Liver")
points(my.data$log2FoldChange[my.data$padj < 0.05],-log10(my.data$padj)[my.data$padj < 0.05], cex=0.5, pch = 16, col = "red")
abline(v=0, lty = "dashed")
sum(my.data$log2FoldChange == 0)
sum(abs(my.data$log2FoldChange) < 0.005)
length(my.data$log2FoldChange)
sum(abs(my.data$log2FoldChange) < 0.001)
head(sort(log2FoldChange[my.data$padj < 0.05]))
head(sort(my.data$log2FoldChange[my.data$padj < 0.05]))
install.packages("doMC", dep=T)
install.packages("caTools", dep=T)
install.packages("utils", dep=T)
install.packages("utils", dep = T)
install.packages("utils", dep = T)
install.packages("utils", dep = T)
install.packages("utils", dep = T)
source("http://bioconductor.org/biocLite.R")
biocLite( "BSgenome" )
source("http://bioconductor.org/biocLite.R")
biocLite( "BSgenome" )
biocLite( "Rsamtools" )
biocLite( "ShortRead" )
my.data <- read.table('~/Downloads/2017-12-05 _postSVA_longevity_matrix.txt', header = T, sep = "\t")
my.data <- read.csv('~/Downloads/2017-12-05 _postSVA_longevity_matrix.txt', header = T, sep = "\t")
library('pheatmap')
install.packages('pheatmap')a
install.packages('pheatmap')
library('pheatmap')
pheatmap(cor(my.data[,-1]))
cor(my.data[,-1])
pheatmap(cor(my.data[,-1], na.action = 'omit'))
?cor
pheatmap(cor(my.data[,-1], use = "complete.obs", method = "spearman"))
pdf("TEst.pdf",height = 20, width = 20)
pheatmap(cor(my.data[,-1], use = "complete.obs", method = "spearman"), show_colnames = F)
dev.off()
?pheatmap
pdf("TEst.pdf",height = 20, width = 20)
pheatmap(cor(my.data[,-1], use = "complete.obs", method = "spearman"), show_colnames = F, fontsize = 5)
dev.off()
View(my.data)
dim(my.saa)
dim(my.data)
View(my.data)
setwd('/Users/BB_2012/Dropbox/manuscripts_and_publications/2018_aging_epigenomics_data_description/aging_omics_paper/Genome_Research_submission/New_code_for_checking/Public_transcriptome_analysis/Pathway_analysis/Running_test/')
library('mHG')
options(stringsAsFactors=F)
source('GOrilla_statistics_functions.R')
install.packages('mHG')
setwd('/Users/BB_2012/Dropbox/manuscripts_and_publications/2018_aging_epigenomics_data_description/aging_omics_paper/Genome_Research_submission/New_code_for_checking/Public_transcriptome_analysis/Pathway_analysis/Running_test/')
library('mHG')
options(stringsAsFactors=F)
source('GOrilla_statistics_functions.R')
my.gmt.sets <- c(paste("/Users/BB_2012/Dropbox/manuscripts_and_publications/2018_aging_epigenomics_data_description/aging_omics_paper/Genome_Research_submission/New_code_for_checking/Public_transcriptome_analysis/Pathway_analysis/Running_test/INPUT/GMT/Param/", list.files("/Volumes/BB_Backup_3/BD_aging_project/Pathway_Enrichment/Param",pattern = "\\.gmt$"), sep="/"),
paste("/Users/BB_2012/Dropbox/manuscripts_and_publications/2018_aging_epigenomics_data_description/aging_omics_paper/Genome_Research_submission/New_code_for_checking/Public_transcriptome_analysis/Pathway_analysis/Running_test/INPUT/GMT/MSigDB", list.files("/Volumes/BB_Backup_3/BD_aging_project/Pathway_Enrichment//GeneSets_gmt_files/MSigDB",pattern = "\\.gmt$"), sep="/"))
my.gmt.set.names <- c("KEGG_2017_All",
"KEGG_2017_no_diseases",
"KEGG_2017_no_diseases_UC",
"C2_CGP",
"Biocarta",
"Kegg",
"reactome",
"C2cp",
"C3Mir",
"C3TF",
"C5BP",
"C5CC",
"C5MF",
"C7_All",
"MSigDB_Hallmarks"
)
cbind(my.gmt.sets, my.gmt.set.names)
my.gmt.sets <- c(paste("/Users/BB_2012/Dropbox/manuscripts_and_publications/2018_aging_epigenomics_data_description/aging_omics_paper/Genome_Research_submission/New_code_for_checking/Public_transcriptome_analysis/Pathway_analysis/Running_test/INPUT/GMT/Param/", list.files("/Users/BB_2012/Dropbox/manuscripts_and_publications/2018_aging_epigenomics_data_description/aging_omics_paper/Genome_Research_submission/New_code_for_checking/Public_transcriptome_analysis/Pathway_analysis/Running_test/INPUT/GMT/Param",pattern = "\\.gmt$"), sep="/"),
paste("/Users/BB_2012/Dropbox/manuscripts_and_publications/2018_aging_epigenomics_data_description/aging_omics_paper/Genome_Research_submission/New_code_for_checking/Public_transcriptome_analysis/Pathway_analysis/Running_test/INPUT/GMT/MSigDB", list.files("//Users/BB_2012/Dropbox/manuscripts_and_publications/2018_aging_epigenomics_data_description/aging_omics_paper/Genome_Research_submission/New_code_for_checking/Public_transcriptome_analysis/Pathway_analysis/Running_test/INPUT/GMT/MSigDB",pattern = "\\.gmt$"), sep="/"))
my.gmt.sets
my.gmt.set.names <- c("KEGG_2017_no_diseases_UC",
"KEGG_2017_no_diseases",
"KEGG_2017_All",
"C2_CGP",
"Biocarta",
"Kegg",
"reactome",
"C2cp",
"C3Mir",
"C3TF",
"C5BP",
"C5CC",
"C5MF",
"C7_All",
"MSigDB_Hallmarks"
)
my.run <- c(1,15)
source('GOrilla_statistics_functions.R')
load("./INPUT/RData/2018-09-24_Boisvert_Cereb_astrocytes_RNAseq.RData")
load("./INPUT/RData/2018-09-24_Bochkis_Liver_RNAseq.RData")
load("./INPUT/RData/2018-09-24_White_Liver_RNAseq.RData")
run_pathway_enrich("Bochkis_Liver", my.bochkis.RNAseq.process[[1]])
source("https://bioconductor.org/biocLite.R")
biocLite("DESeq2")
load("./INPUT/RData/2018-09-24_Boisvert_Cereb_astrocytes_RNAseq.RData")
load("./INPUT/RData/2018-09-24_Bochkis_Liver_RNAseq.RData")
load("./INPUT/RData/2018-09-24_White_Liver_RNAseq.RData")
run_pathway_enrich("Bochkis_Liver", my.bochkis.RNAseq.process[[1]])
setwd('/Users/BB_2012/Dropbox/manuscripts_and_publications/2018_aging_epigenomics_data_description/aging_omics_paper/Genome_Research_submission/New_code_for_checking/Public_transcriptome_analysis/Pathway_analysis/summarizing_test//')
options(stringsAsFactors=F)
source('Process_gorilla_like_results_FUNCTIONS_vCOMP_v2.R')
source('Process_gorilla_like_results_FUNCTIONS_vCOMP_v2.R')
my.hallmark.mouse <- read.table('INPUT/2018-01-05_Enrichment_table_MSIgDB_Hallmark_Datasets_pathways_significant_in_4_or_more.txt',sep = "\t", header=T)
my.kegg.mouse2 <- read.csv('INPUT/2017-06-17_Enrichment_table_KEGG_2017_pathways_significant_in_4_or_more.txt',sep = "\t", header=T)
get_enrich_balloons_all_species("Hallmark", my.hallmark.mouse)
library('pheatmap')
library(ggplot2)
library(scales)
#########################################################################################################
# my.data.name <- "MSigDB_Hallmarks"
# my.mouse.sigs = my.hallmark.mouse
# my.colnames = c("Liver","Cereb")
get_enrich_balloons_all_species <- function(my.data.name, my.mouse.sigs, my.colnames = c("Liver","Cereb") ) {
# get files from dataset
my.enrich.sets.mouse <- list.files("INPUT/FDR5percent_Mouse/", pattern = my.data.name)
my.enrich.sets.others   <- list.files("INPUT/FDR5percent_Public/",pattern = my.data.name)
my.enrich.sets.bochkis  <- my.enrich.sets.others[grep("Bochkis_Liver",my.enrich.sets.others)]
my.enrich.sets.white    <- my.enrich.sets.others[grep("White_Liver",my.enrich.sets.others)]
my.enrich.sets.boisvert <- my.enrich.sets.others[grep("Boisvert_cereb_astrocytes",my.enrich.sets.others)]
# get file names and path
my.files.sets.mouse    <- paste("INPUT/FDR5percent_Mouse/",my.enrich.sets.mouse, sep="/")
my.files.sets.bochkis  <- paste("INPUT/FDR5percent_Public/", my.enrich.sets.bochkis , sep="")
my.files.sets.white    <- paste("INPUT/FDR5percent_Public/", my.enrich.sets.white   , sep="")
my.files.sets.boisvert <- paste("INPUT/FDR5percent_Public/", my.enrich.sets.boisvert, sep="")
# reorder files based on colnames
my.columns.mouse <- c()
for (i in 1:length(my.colnames)) {
my.columns.mouse <- c(my.columns.mouse,grep(paste("_",my.colnames[i],sep=""),my.enrich.sets.mouse))
}
my.enrichment.files <- c(my.files.sets.mouse[my.columns.mouse],
my.files.sets.bochkis  ,
my.files.sets.white,
my.files.sets.boisvert)
my.samples <- c("Benayoun_liver",
"Benayoun_cerebellum",
"Bochkis_liver",
"White_Liver",
"Boisvert_Cereb_astrocytes")
# get data from significant FDR 0.05
my.tissues.kegg <- vector(length=length(my.enrichment.files), mode="list")
names(my.tissues.kegg) <- my.samples
for ( i in 1:length(my.samples)) {
my.file <- my.enrichment.files[i]
my.tissues.kegg[[i]]  <- read.csv(my.file,sep="\t", header=T)
}
my.pathways <- rownames(my.mouse.sigs)
####
# prepapre output data
# p-val matrix
my.matrix <- matrix(0,length(my.pathways),length(my.samples)) # default: -log10(1) pval == 0 no enrichment
# Enrichment matrix
my.matrix2 <- matrix(0,length(my.pathways),length(my.samples)) # initialize with Enrichment = 0 if no enrich
# matrix with record of significance
my.matrix3 <- matrix(0,length(my.pathways),length(my.samples)) # to get sigificant pathways
colnames(my.matrix) <- my.samples
colnames(my.matrix2) <- my.samples
colnames(my.matrix3) <- my.samples
rownames(my.matrix) <- my.pathways
rownames(my.matrix2) <- my.pathways
rownames(my.matrix3) <- my.pathways
# collect data from files
for (i in 1:length(my.pathways)) {
#print(my.pathways[i])
for (j in 1:length(my.samples)) { # tissues
my.id <- which(my.tissues.kegg[[j]]$Gene_Set %in% my.pathways[i])
if(length(my.id) == 1) { # if was significant in this tissue (and not on both tail ends, which would be 2)
my.matrix[i,j] <- -log10(my.tissues.kegg[[j]]$p.val[my.id]) # log(0) is undefined
if (my.tissues.kegg[[j]]$Direction[my.id] == 'UP') {
my.matrix2[i,j] <- my.tissues.kegg[[j]]$Enrichment[my.id]
} else if (my.tissues.kegg[[j]]$Direction[my.id] == 'DOWN'){
my.matrix2[i,j] <- - my.tissues.kegg[[j]]$Enrichment[my.id]
}
my.matrix3[i,j] <- 1
}
}
}
# get into data frame (all mouse significant are plotted)
my.res.enrich <- data.frame(my.matrix2)
my.pval.enrich <- data.frame(my.matrix)
# sort by average change in original mouse analysis (transcriptome figure), stored in my.mouse.sigs
my.average <- apply(my.mouse.sigs,1,mean)
my.sorted <- sort(my.average,index.return=T,decreasing=T)
my.res.enrich2 <- my.res.enrich[my.sorted$ix,]
my.pval.enrich2 <- data.frame(my.pval.enrich[my.sorted$ix,])
my.txtname <- paste('./Stats_tables/',
paste(Sys.Date(),"Enrichment_table_All_species",my.data.name,"pathways_significant_in_Mouse_data.txt", sep="_"),
sep="")
write.table(my.res.enrich2,file=my.txtname,sep="\t",quote=F)
my.res.enrich2$Pathnames <- rownames(my.res.enrich2)
# format for ggplot
my.res.enrich3 <- cbind(my.res.enrich2[,c('Pathnames',my.samples[1])],rep(my.samples[1],dim(my.res.enrich2)[1]),my.pval.enrich2[,my.samples[1]])
colnames(my.res.enrich3) <- c('Pathnames','aging_signed_enricment','condition','minusLog10Pval')
for ( h in 2:length(my.samples)) {
my.new <- cbind(my.res.enrich2[,c('Pathnames',my.samples[h])],rep(my.samples[h],dim(my.res.enrich2)[1]),my.pval.enrich2[,my.samples[h]])
colnames(my.new) <- colnames(my.res.enrich3)
my.res.enrich3 <- rbind(my.res.enrich3,
my.new)
}
my.max <- max(my.res.enrich3$aging_signed_enricment)
my.min <- min(my.res.enrich3$aging_signed_enricment)
my.values <- c(my.min,0.75*my.min,0.5*my.min,0.25*my.min,0,0.25*my.max,0.5*my.max,0.75*my.max,my.max)
my.scaled <- rescale(my.values, to = c(0, 1))
my.color.vector <- c("darkblue","dodgerblue4","dodgerblue3","dodgerblue1","white","lightcoral","brown1","firebrick2","firebrick4")
# to preserve the wanted order
my.res.enrich3$condition <- factor(my.res.enrich3$condition, levels = unique(my.res.enrich3$condition))
my.res.enrich3$Pathnames <- factor(my.res.enrich3$Pathnames, levels = rev(unique(my.res.enrich3$Pathnames)))
my.pdfname <- paste('./MYDATA/',
paste(Sys.Date(),"Enrichment_BALLOON_plot_All_species",my.data.name,"pathways_significant_in_Mouse_data.pdf", sep="_"),
sep="")
pdf(my.pdfname, onefile=F, height = max(5, length(my.pathways)/3), width=15)
my.plot <- ggplot(my.res.enrich3,aes(x=condition,y=Pathnames,colour=aging_signed_enricment,size=minusLog10Pval))+ theme_bw()+ geom_point(shape = 16)
my.plot <- my.plot + ggtitle("Aging dysregylated pathways") + labs(x = "Tissue/condition", y = "Gene Set")
my.plot <- my.plot + scale_colour_gradientn(colours = my.color.vector,space = "Lab", na.value = "grey50", guide = "colourbar", values = my.scaled)
print(my.plot)
dev.off()
}
get_enrich_balloons_all_species("Hallmark", my.hallmark.mouse)
get_enrich_balloons_all_species("KEGG_2017", my.kegg.mouse2)
my.data.name <- "MSigDB_Hallmarks"
my.mouse.sigs = my.hallmark.mouse
my.colnames = c("Liver","Cereb")
my.enrich.sets.mouse <- list.files("INPUT/FDR5percent_Mouse/", pattern = my.data.name)
my.enrich.sets.mouse
list.files("INPUT/FDR5percent_Mouse/")
my.data.name <- "MSigDB_Hallmark"
my.enrich.sets.mouse <- list.files("INPUT/FDR5percent_Mouse/", pattern = my.data.name)
my.enrich.sets.mouse
my.data.name
my.data.name <- "MSIgDB_Hallmark"
my.enrich.sets.mouse <- list.files("INPUT/FDR5percent_Mouse/", pattern = my.data.name)
my.enrich.sets.mouse
my.enrich.sets.others   <- list.files("INPUT/FDR5percent_Public/",pattern = my.data.name)
my.enrich.sets.bochkis  <- my.enrich.sets.others[grep("Bochkis_Liver",my.enrich.sets.others)]
my.enrich.sets.white    <- my.enrich.sets.others[grep("White_Liver",my.enrich.sets.others)]
my.enrich.sets.boisvert <- my.enrich.sets.others[grep("Boisvert_cereb_astrocytes",my.enrich.sets.others)]
my.enrich.sets.bochkis
my.enrich.sets.others
list.files("INPUT/FDR5percent_Public/")
my.data.name
my.data.name <- "Hallmark"
# get files from dataset
my.enrich.sets.mouse <- list.files("INPUT/FDR5percent_Mouse/", pattern = my.data.name)
my.enrich.sets.others   <- list.files("INPUT/FDR5percent_Public/",pattern = my.data.name)
my.enrich.sets.bochkis  <- my.enrich.sets.others[grep("Bochkis_Liver",my.enrich.sets.others)]
my.enrich.sets.white    <- my.enrich.sets.others[grep("White_Liver",my.enrich.sets.others)]
my.enrich.sets.boisvert <- my.enrich.sets.others[grep("Boisvert_cereb_astrocytes",my.enrich.sets.others)]
my.enrich.sets.mouse
my.enrich.sets.others
my.enrich.sets.bochkis
my.enrich.sets.boisvert
my.enrich.sets.white
# get file names and path
my.files.sets.mouse    <- paste("INPUT/FDR5percent_Mouse/",my.enrich.sets.mouse, sep="/")
my.files.sets.bochkis  <- paste("INPUT/FDR5percent_Public/", my.enrich.sets.bochkis , sep="")
my.files.sets.white    <- paste("INPUT/FDR5percent_Public/", my.enrich.sets.white   , sep="")
my.files.sets.boisvert <- paste("INPUT/FDR5percent_Public/", my.enrich.sets.boisvert, sep="")
my.files.sets.bochkis
my.files.sets.mouse
# reorder files based on colnames
my.columns.mouse <- c()
for (i in 1:length(my.colnames)) {
my.columns.mouse <- c(my.columns.mouse,grep(paste("_",my.colnames[i],sep=""),my.enrich.sets.mouse))
}
my.columns.mouse
my.enrichment.files <- c(my.files.sets.mouse[my.columns.mouse],
my.files.sets.bochkis  ,
my.files.sets.white,
my.files.sets.boisvert)
my.enrichment.files
my.samples <- c("Benayoun_liver",
"Benayoun_cerebellum",
"Bochkis_liver",
"White_Liver",
"Boisvert_Cereb_astrocytes")
# get data from significant FDR 0.05
my.tissues.kegg <- vector(length=length(my.enrichment.files), mode="list")
names(my.tissues.kegg) <- my.samples
for ( i in 1:length(my.samples)) {
my.file <- my.enrichment.files[i]
my.tissues.kegg[[i]]  <- read.csv(my.file,sep="\t", header=T)
}
my.pathways <- rownames(my.mouse.sigs)
my.pathways
####
# prepapre output data
# p-val matrix
my.matrix <- matrix(0,length(my.pathways),length(my.samples)) # default: -log10(1) pval == 0 no enrichment
# Enrichment matrix
my.matrix2 <- matrix(0,length(my.pathways),length(my.samples)) # initialize with Enrichment = 0 if no enrich
# matrix with record of significance
my.matrix3 <- matrix(0,length(my.pathways),length(my.samples)) # to get sigificant pathways
colnames(my.matrix) <- my.samples
colnames(my.matrix2) <- my.samples
colnames(my.matrix3) <- my.samples
rownames(my.matrix) <- my.pathways
rownames(my.matrix2) <- my.pathways
rownames(my.matrix3) <- my.pathways
# collect data from files
for (i in 1:length(my.pathways)) {
#print(my.pathways[i])
for (j in 1:length(my.samples)) { # tissues
my.id <- which(my.tissues.kegg[[j]]$Gene_Set %in% my.pathways[i])
if(length(my.id) == 1) { # if was significant in this tissue (and not on both tail ends, which would be 2)
my.matrix[i,j] <- -log10(my.tissues.kegg[[j]]$p.val[my.id]) # log(0) is undefined
if (my.tissues.kegg[[j]]$Direction[my.id] == 'UP') {
my.matrix2[i,j] <- my.tissues.kegg[[j]]$Enrichment[my.id]
} else if (my.tissues.kegg[[j]]$Direction[my.id] == 'DOWN'){
my.matrix2[i,j] <- - my.tissues.kegg[[j]]$Enrichment[my.id]
}
my.matrix3[i,j] <- 1
}
}
}
my.res.enrich
my.matrix2
# get into data frame (all mouse significant are plotted)
my.res.enrich <- data.frame(my.matrix2)
my.pval.enrich <- data.frame(my.matrix)
my.pval.enrich
# sort by average change in original mouse analysis (transcriptome figure), stored in my.mouse.sigs
my.average <- apply(my.mouse.sigs,1,mean)
my.sorted <- sort(my.average,index.return=T,decreasing=T)
my.res.enrich2 <- my.res.enrich[my.sorted$ix,]
my.pval.enrich2 <- data.frame(my.pval.enrich[my.sorted$ix,])
my.txtname <- paste('./Stats_tables/',
paste(Sys.Date(),"Enrichment_table_All_species",my.data.name,"pathways_significant_in_Mouse_data.txt", sep="_"),
sep="")
write.table(my.res.enrich2,file=my.txtname,sep="\t",quote=F)
my.res.enrich2$Pathnames <- rownames(my.res.enrich2)
# format for ggplot
my.res.enrich3 <- cbind(my.res.enrich2[,c('Pathnames',my.samples[1])],rep(my.samples[1],dim(my.res.enrich2)[1]),my.pval.enrich2[,my.samples[1]])
colnames(my.res.enrich3) <- c('Pathnames','aging_signed_enricment','condition','minusLog10Pval')
for ( h in 2:length(my.samples)) {
my.new <- cbind(my.res.enrich2[,c('Pathnames',my.samples[h])],rep(my.samples[h],dim(my.res.enrich2)[1]),my.pval.enrich2[,my.samples[h]])
colnames(my.new) <- colnames(my.res.enrich3)
my.res.enrich3 <- rbind(my.res.enrich3,
my.new)
}
my.max <- max(my.res.enrich3$aging_signed_enricment)
my.min <- min(my.res.enrich3$aging_signed_enricment)
my.values <- c(my.min,0.75*my.min,0.5*my.min,0.25*my.min,0,0.25*my.max,0.5*my.max,0.75*my.max,my.max)
my.scaled <- rescale(my.values, to = c(0, 1))
my.color.vector <- c("darkblue","dodgerblue4","dodgerblue3","dodgerblue1","white","lightcoral","brown1","firebrick2","firebrick4")
# to preserve the wanted order
my.res.enrich3$condition <- factor(my.res.enrich3$condition, levels = unique(my.res.enrich3$condition))
my.res.enrich3$Pathnames <- factor(my.res.enrich3$Pathnames, levels = rev(unique(my.res.enrich3$Pathnames)))
my.pdfname <- paste('./MYDATA/',
paste(Sys.Date(),"Enrichment_BALLOON_plot_All_species",my.data.name,"pathways_significant_in_Mouse_data.pdf", sep="_"),
sep="")
pdf(my.pdfname, onefile=F, height = max(5, length(my.pathways)/3), width=15)
my.plot <- ggplot(my.res.enrich3,aes(x=condition,y=Pathnames,colour=aging_signed_enricment,size=minusLog10Pval))+ theme_bw()+ geom_point(shape = 16)
my.plot <- my.plot + ggtitle("Aging dysregylated pathways") + labs(x = "Tissue/condition", y = "Gene Set")
my.plot <- my.plot + scale_colour_gradientn(colours = my.color.vector,space = "Lab", na.value = "grey50", guide = "colourbar", values = my.scaled)
print(my.plot)
dev.off()
