loadedNamespaces()
pchsq(69,1,lower.tail=T)
pchisq(69,1,upper.tail=T)
pchisq(69,1,lower.tail=F)
pchisq(69,1)
1-pchisq(69,1)
1-pchisq(200,1)
pchisq(200,1,lower.tail=F)
?pchisq
pchisq(10,1,lower.tail=F)
1-pchisq(10,1)
mat <- matrix(c(5,26,234,12977),2,2)
fisher.test(mat,alternative="greater")
read.table('/Volumes/LaCie/Genome_suite/Annotations_MAKER/Length_of_German_ESTs.txt')
german <- read.table('/Volumes/LaCie/Genome_suite/Annotations_MAKER/Length_of_German_ESTs.txt')
summary(german)
oases <- read.table('/Volumes/LaCie/Genome_suite/Annotations_MAKER/Length_of_aligned_oases_ESTs.txt.txt')
oases <- read.table('/Volumes/LaCie/Genome_suite/Annotations_MAKER/Length_of_aligned_oases_ESTs.txt')
summary(oases)
462-17
28494-602-462-17
mat <- matrix(c(17,602,445,27413))
fisher.test(mat)
mat <- matrix(c(17,602,445,27413),2,2)
fisher.test(mat)
fisher.test(mat,alternative="greater")
fisher.test(mat,alternative="less")
13637-94-497
-7
13046-7
mat <- matrix(c(7,94,490,13039),2,2)
fisher.test(mat,alternative="greater")
mat <- matrix(c(7,94,490,13046),2,2)
fisher.test(mat,alternative="greater")
497-18
232-18
13637-214-497-18
mat <- matrix(c(18,214,479,12908),2,2)
fisher.test(mat)
hat i get
longevitymapLarge <-
matrix(c(18, 479, 214, 13288),
nrow = 2,
dimnames = list(Guess = c("Aging", "NoAging"),
Truth = c("selection", "Noselection")))
longevitymapLarge
hat i get
longevitymapLarge <-
matrix(c(18, 479, 214, 12908),
nrow = 2,
dimnames = list(Guess = c("Aging", "NoAging"),
Truth = c("selection", "Noselection")))
hat i get
longevitymapLarge <-
matrix(c(18, 479, 214, 12926),
nrow = 2,
dimnames = list(Guess = c("Aging", "NoAging"),
Truth = c("selection", "Noselection")))
fisher.test(longevitymapLarge)
fisher.test(longevitymapLarge,alternative="greater")
i =1
my.vec.rand <- sample(1:13242,31,replace = FALSE)
my.vec.rand
my.overlap.simul[i] <- sum(my.vec.rand < 238)
my.overlap.simul
sum(my.vec.rand < 238)
}
my.overlap.simul <- rep(0,1e3)
for (i in 1:1e3) {
my.vec.rand <- sample(1:13242,31,replace = FALSE)
my.overlap.simul[i] <- sum(my.vec.rand < 238)
}
my.overlap.simul
summary(my.overlap.simul)
my.overlap.simul <- rep(0,1e6)
for (i in 1:1e6) {
my.vec.rand <- sample(1:13242,31,replace = FALSE)
my.overlap.simul[i] <- sum(my.vec.rand < 238)
}
my.simul.fn <- ecdf(my.overlap.simul)
my.pval.simul <- 1 - my.simul.fn(4.9) # so that 5 is included
my.pval.simul
summary(my.overlap.simul)
13637-492-475-22
my.npc.pool <- read.csv("/Volumes/MyBook_3/BD_aging_project/RNAseq/NPC_Pool/STAR/Aging_NPCs_pool_counts_genes.txt",
header=T,sep="\t")
my.npc.sc.3m <- read.csv("/Volumes/MyBook_3/BD_aging_project/RNAseq/scRNA-seq_Ben/BAM/Young_ForBerenice/scRNAseq_NPC_3m_counts_genes.txt",
header=T,sep="\t")
my.npc.sc.29m <- read.csv("/Volumes/MyBook_3/BD_aging_project/RNAseq/scRNA-seq_Ben/BAM/Old_ForBerenice/scRNAseq_NPC_29m_counts_genes.txt",
header=T,sep="\t")
my.npc.pool <- read.csv("/Volumes/MyBook_3/BD_aging_project/RNAseq/NPC_Pool/STAR/Aging_NPCs_pool_counts_genes.txt",
skip=1,header=T,sep="\t")
my.npc.sc.3m <- read.csv("/Volumes/MyBook_3/BD_aging_project/RNAseq/scRNA-seq_Ben/BAM/Young_ForBerenice/scRNAseq_NPC_3m_counts_genes.txt",
skip=1,header=T,sep="\t")
my.npc.sc.29m <- read.csv("/Volumes/MyBook_3/BD_aging_project/RNAseq/scRNA-seq_Ben/BAM/Old_ForBerenice/scRNAseq_NPC_29m_counts_genes.txt",
skip=1,header=T,sep="\t")
my.annot.cols <- 1:6
colnames(my.npc.sc.3m)
my.sc.3m.pooled <- apply(my.npc.sc.3m[7:74,],1,sum)
my.npc.pool <- read.csv("/Volumes/MyBook_3/BD_aging_project/RNAseq/NPC_Pool/STAR/Aging_NPCs_pool_counts_genes.txt",
skip=1,header=T,sep="\t",stringsAsFactors=F)
my.npc.sc.3m <- read.csv("/Volumes/MyBook_3/BD_aging_project/RNAseq/scRNA-seq_Ben/BAM/Young_ForBerenice/scRNAseq_NPC_3m_counts_genes.txt",
skip=1,header=T,sep="\t",stringsAsFactors=F)
my.npc.sc.29m <- read.csv("/Volumes/MyBook_3/BD_aging_project/RNAseq/scRNA-seq_Ben/BAM/Old_ForBerenice/scRNAseq_NPC_29m_counts_genes.txt",
skip=1,header=T,sep="\t",stringsAsFactors=F)
my.sc.3m.pooled <- apply(my.npc.sc.3m[,7:74],1,sum)
head(my.sc.3m.pooled)
plot(my.npc.pool$NPC_3m6.CGATGTAligned.out.bam,my.sc.3m.pooled)
length(my.npc.pool$NPC_3m6.CGATGTAligned.out.bam)
my.npc.pool$NPC_3m6.CGATGTAligned.out.bam
my.npc.pool
head(my.npc.pool)
head(my.npc.pool$NPC_3m5.CGATGTAligned.out.bam)
plot(my.npc.pool$NPC_3m6.TGACCAAligned.out.bam,my.sc.3m.pooled)
plot(my.npc.pool$NPC_3m6.TGACCAAligned.out.bam,my.sc.3m.pooled,log='xy,col="red')
plot(my.npc.pool$NPC_3m6.TGACCAAligned.out.bam,my.sc.3m.pooled,log='xy',col="red")
plot(my.npc.pool$NPC_3m5.CGATGTAligned.out.bam,my.sc.3m.pooled,log='xy',col="red")
plot(my.npc.pool$NPC_3m6.TGACCAAligned.out.bam,my.sc.3m.pooled,log='xy',col="red",xlim=c(0,50000),ylim=c(0,1e6))
plot(my.npc.pool$NPC_3m6.TGACCAAligned.out.bam,my.sc.3m.pooled,log='xy',col="red",xlim=c(0,50000),ylim=c(0,10000))
plot(my.npc.pool$NPC_3m6.TGACCAAligned.out.bam,my.sc.3m.pooled,log='xy',col="red",xlim=c(0,50000))
plot(my.npc.pool$NPC_3m6.TGACCAAligned.out.bam,my.sc.3m.pooled,log='xy',col="red")
plot(my.npc.pool$NPC_3m5.CGATGTAligned.out.bam,my.sc.3m.pooled,log='xy',col="red")
my.sc.29m.pooled <- apply(my.npc.sc.29m[,7:46],1,sum)
plot(my.npc.pool$NPC_29m6.CTTGTAAligned.out.bam,my.sc.29m.pooled,log='xy',col="red")
plot(my.npc.pool$NPC_29m5.CAGATCAligned.out.bam,my.sc.29m.pooled,log='xy',col="red")
cor(my.npc.pool$NPC_3m6.TGACCAAligned.out.bam,my.sc.3m.pooled,method="spearman")
cor(my.npc.pool$NPC_3m5.CGATGTAligned.out.bam,my.sc.3m.pooled,method="spearman")
cor(my.npc.pool$NPC_29m6.CTTGTAAligned.out.bam,my.sc.29m.pooled,method="spearman")
cor(my.npc.pool$NPC_29m5.CAGATCAligned.out.bam,my.sc.29m.pooled,method="spearman")
cor(my.npc.pool$NPC_3m5.CGATGTAligned.out.bam,my.npc.pool$NPC_3m6.TGACCAAligned.out.bam,method="spearman") # 0.8955164
cor(my.npc.pool$NPC_29m5.CAGATCAligned.out.bam,my.npc.pool$NPC_29m6.CTTGTAAligned.out.bam,method="spearman") # 0.9603109
par(mfrow=c(2,2))
plot(my.npc.pool$NPC_3m6.TGACCAAligned.out.bam,my.sc.3m.pooled,log='xy',col="red", xlab="3m6 pool counts",ylab="3m pooled single cells")
plot(my.npc.pool$NPC_3m5.CGATGTAligned.out.bam,my.sc.3m.pooled,log='xy',col="red", xlab="3m5 pool counts",ylab="3m pooled single cells")
plot(my.npc.pool$NPC_29m6.CTTGTAAligned.out.bam,my.sc.29m.pooled,log='xy',col="red", xlab="29m6 pool counts",ylab="29m pooled single cells")
plot(my.npc.pool$NPC_29m5.CAGATCAligned.out.bam,my.sc.29m.pooled,log='xy',col="red", xlab="29m5 pool counts",ylab="29m pooled single cells")
par(mfrow=c(1,1))
spikes.idx <- grep("ERCC-", my.npc.pool$Geneid)
spikes.idx
cor(my.npc.pool$NPC_3m6.TGACCAAligned.out.bam[-spikes.idx],my.sc.3m.pooled[-spikes.idx],method="spearman") # 0.8774969
cor(my.npc.pool$NPC_3m5.CGATGTAligned.out.bam[-spikes.idx],my.sc.3m.pooled[-spikes.idx],method="spearman") # 0.8955164
cor(my.npc.pool$NPC_3m5.CGATGTAligned.out.bam[-spikes.idx],my.npc.pool$NPC_3m6.TGACCAAligned.out.bam[-spikes.idx],method="spearman") # 0.9603109
cor(my.npc.pool$NPC_29m6.CTTGTAAligned.out.bam[-spikes.idx],my.sc.29m.pooled[-spikes.idx],method="spearman") # 0.8808903
cor(my.npc.pool$NPC_29m5.CAGATCAligned.out.bam[-spikes.idx],my.sc.29m.pooled[-spikes.idx],method="spearman") # 0.8816057
cor(my.npc.pool$NPC_29m5.CAGATCAligned.out.bam[-spikes.idx],my.npc.pool$NPC_29m6.CTTGTAAligned.out.bam[-spikes.idx],method="spearman") # 0.96567
pdf("Correlation_single_cells_pools_NPCs_3mvs29m_2015-11-9.pdf")
par(mfrow=c(2,2))
plot(my.npc.pool$NPC_3m6.TGACCAAligned.out.bam,my.sc.3m.pooled,log='xy',col="red", xlab="3m6 pool counts",ylab="3m pooled single cells")
plot(my.npc.pool$NPC_3m5.CGATGTAligned.out.bam,my.sc.3m.pooled,log='xy',col="red", xlab="3m5 pool counts",ylab="3m pooled single cells")
plot(my.npc.pool$NPC_29m6.CTTGTAAligned.out.bam,my.sc.29m.pooled,log='xy',col="red", xlab="29m6 pool counts",ylab="29m pooled single cells")
plot(my.npc.pool$NPC_29m5.CAGATCAligned.out.bam,my.sc.29m.pooled,log='xy',col="red", xlab="29m5 pool counts",ylab="29m pooled single cells")
par(mfrow=c(1,1))
dev.off()
library(edgeR)
library(edgeR)
load('/Volumes/MyBook_3/BD_aging_project/RNAseq/All_tissues_analysis/Pathway_enrichment/RData/2016-08-18_Heart_Hallmark_Datasets_26_object.RData')
kegg.results
kegg.results$driver.genes$HALLMARK_OXIDATIVE_PHOSPHORYLATION
setwd('/Volumes/MyBook_3/BD_aging_project/Public_datasets/GTex/')
options(stringsAsFactors=F)


# 2016-12-20
# try to compare GTEx againg and my data

load("2016-12-17\ Heart_GTEx_data_DEseq2_aging_genename.RData")
load("2016-12-17\ Cerebellum_GTEx_data_DEseq2_aging_genename.RData")
load("2016-12-17\ Liver_GTEx_data_DEseq2_aging_genename.RData")

load("/Volumes/MyBook_3/BD_aging_project/RNAseq/All_tissues_analysis/DEseq2_runs/Separate/RNA_seq_result_cereb_2015-11-19.RData")
load("/Volumes/MyBook_3/BD_aging_project/RNAseq/All_tissues_analysis/DEseq2_runs/Separate/RNA_seq_result_Heart_2015-11-19.RData")
load("/Volumes/MyBook_3/BD_aging_project/RNAseq/All_tissues_analysis/DEseq2_runs/Separate/RNA_seq_result_Liver_2015-11-19.RData")
setwd('/Volumes/MyBook_3/BD_aging_project/Public_datasets/GTex/')
options(stringsAsFactors=F)


# 2016-12-20
# try to compare GTEx againg and my data

load("2016-12-17_Heart_GTEx_data_DEseq2_aging_genename.RData")
load("2016-12-17_Cerebellum_GTEx_data_DEseq2_aging_genename.RData")
load("2016-12-17_Liver_GTEx_data_DEseq2_aging_genename.RData")
setwd('/Volumes/MyBook_3/BD_aging_project/Public_datasets/GTex/')
options(stringsAsFactors=F)


# 2016-12-20
# try to compare GTEx againg and my data

load("2016-12-17_Heart_GTEx_data_DEseq2_aging_genename.RData")
load("2016-12-17_Cerebellum_GTEx_data_DEseq2_aging_genename.RData")
load("2016-12-17_Liver_GTEx_data_DEseq2_aging_genename.RData")
install.packages("doMC", dep=T)
install.packages("caTools", dep=T)
install.packages("utils", dep=T)
install.packages("utils", dep = T)
source("http://bioconductor.org/biocLite.R")
biocLite( "BSgenome" )
library(pwr)
?
pwr.t.test
pwr.t.test(n = NULL, d = 2, sig.level = 0.05, power = 0.8,
type = "two.sample",alternative = "two.sided")
pwr.t.test(n = NULL, d = 2, sig.level = 0.05, power = 0.9,
type = "two.sample",alternative = "two.sided")
pwr.t.test(n = NULL, d = 1.5, sig.level = 0.05, power = 0.9,
type = "two.sample",alternative = "two.sided")
pwr.t.test(n = NULL, d = 2, sig.level = 0.05, power = 0.95,
type = "two.sample",alternative = "two.sided")
pwr.t.test(n = NULL, d = 1.5, sig.level = 0.05, power = 0.95,
type = "two.sample",alternative = "two.sided")
pwr.t.test(n = NULL, d = 0.15, sig.level = 0.05, power = 0.80,
type = "two.sample",alternative = "two.sided")
pwr.t.test(n = 5, d = 2, sig.level = 0.05, power = NULL,
type = "two.sample",alternative = "two.sided")
pwr.t.test(n = 5, d = NULL, sig.level = 0.05, power = 0.8,
type = "two.sample",alternative = "two.sided")
pwr.t.test(n = 5, d = NULL, sig.level = 0.05, power = 0.95,
type = "two.sample",alternative = "two.sided")
devtools::install_github("catavallejos/BASiCS", build_vignettes = TRUE)
library(devtools)
source("http://bioconductor.org/biocLite.R")
biocLite("BiocGenerics")
devtools::install_github("catavallejos/BASiCS", build_vignettes = TRUE)
biocLite("scran")
require(devtools)
devtools::install_github('hms-dbmi/scde', build_vignettes = FALSE)
setwd('/Volumes/MyBook_3/BD_aging_project/RNAseq/All_tissues_analysis/CIBERSORT/VST_transformed_coutns/')
my.fibro1 <- read.table('/Volumes/LaCie/Disease_model/Alzheimer_model/RNAseq/STAR/Alzheimer_model_expression_Hippocampus_genes.txt',skip=1,header=T,sep="\t",stringsAsFactors=F)
my.azh1 <- read.table('/Volumes/LaCie/Disease_model/Alzheimer_model/RNAseq/STAR/Alzheimer_model_expression_Hippocampus_genes.txt',skip=1,header=T,sep="\t",stringsAsFactors=F)
head(my.azh1)
colnames(my.azh1)
my.azh <- my.azh1[,c(1,6:18)]
rownames(my.azh) <- my.azh[,1]
preprocess_matrix <- function(my.matrix) {
ncols <- dim(my.matrix)[2]
# get the genes with no reads out
my.null <- which(apply(my.matrix[,3:ncols], 1, sum) <= 1) # see deseq2 vignetter
# Now pull out the spike in genes
spikes.idx <- grep("ERCC-", rownames(my.matrix))
my.exclude <- union(my.null,spikes.idx)
my.filtered.matrix <- my.matrix[-my.exclude,3:ncols]
rownames(my.filtered.matrix) <- my.matrix[-my.exclude,1]
return(my.filtered.matrix)
}
my.azh.proc <- preprocess_matrix(my.azh)
colnames( my.azh.proc )
my.age = rep(c(rep(2,3),rep(6,3) ),2))
c(rep(2,3),rep(6,3) )
my.age = rep(c(rep(2,3),rep(6,3) ) ,2)
my.age
my.age = rep(c(rep(2,3),rep(6,3) ) ,2)
my.genotype = c(rep("WT",6),rep("CKp25",6))
dataDesign = data.frame( row.names = colnames( my.azh.proc ), age = my.age, genotype = my.genotype )
# get matrix using age as a modeling covariate
dds <- DESeqDataSetFromMatrix(countData = my.azh.proc, colData = dataDesign,design = ~ age + genotype)
# run DESeq and export normalized expression values
dds <- estimateSizeFactors(dds)
dds <- estimateDispersions(dds)
vsd <- getVarianceStabilizedData(dds)
library('DESeq2')
my.age = rep(c(rep(2,3),rep(6,3) ) ,2)
my.genotype = c(rep("WT",6),rep("CKp25",6))
dataDesign = data.frame( row.names = colnames( my.azh.proc ), age = my.age, genotype = my.genotype )
# get matrix using age as a modeling covariate
dds <- DESeqDataSetFromMatrix(countData = my.azh.proc, colData = dataDesign,design = ~ age + genotype)
# run DESeq and export normalized expression values
dds <- estimateSizeFactors(dds)
dds <- estimateDispersions(dds)
vsd <- getVarianceStabilizedData(dds)
head(vsd)
# run DESeq and export normalized expression values
dds <- estimateSizeFactors(dds)
dds <- estimateDispersions(dds)
vsd <- getVarianceStabilizedData(dds)
# output result tables to files
my.out.ct.mat <- paste(Sys.Date(),"Alzheimers_model_log2_VST_counts_matrix.txt")
write.table(vsd, file = my.out.ct.mat , sep = "\t" , row.names = T, quote=F)
my.hippo1 <- read.table('/Volumes/LaCie/Disease_model/Hippocampus_40Hz/STAR/Hippocampus_gamma40Hz_counts_genes.txt',skip=1,header=T,sep="\t",stringsAsFactors=F)
my.hippo1 <- read.table('/Volumes/LaCie/Disease_model/Hippocampus_40Hz/STAR/Hippocampus_gamma40Hz_counts_genes.txt',skip=1,header=T,sep="\t",stringsAsFactors=F)
my.hippo <- my.hippo1[,c(1,6:12)]
rownames(my.hippo) <- my.hippo[,1]
head(my.hippo)
# clean up input
my.hippo.proc <- preprocess_matrix(my.hippo)
# design matrix
my.treatment = c(rep("CTL",3),rep("GAMMA",3))
dataDesign = data.frame( row.names = colnames( my.hippo.proc ), treatment = my.treatment )
# get matrix using age as a modeling covariate
dds <- DESeqDataSetFromMatrix(countData = my.hippo.proc, colData = dataDesign,design = ~ treatment)
# run DESeq and export normalized expression values
dds <- estimateSizeFactors(dds)
dds <- estimateDispersions(dds)
vsd <- getVarianceStabilizedData(dds)
head(vsd)
my.out.ct.mat <- paste(Sys.Date(),"40Hz_gamma_hippocampus_model_log2_VST_counts_matrix.txt")
write.table(vsd, file = my.out.ct.mat , sep = "\t" , row.names = T, quote=F)
my.ctx1 <- read.table('/Volumes/LaCie/Disease_model/Huntingdon_Disease_Model/RNAseq/STAR/Huntingdon_model_expression_Cortex_genes.txt',skip=1,header=T,sep="\t",stringsAsFactors=F)
my.ctx <- my.ctx1[,c(1,6:18)]
rownames(my.ctx) <- my.ctx[,1]
head(my.ctx)
colnames(my.ctx)
my.ctx.proc <- preprocess_matrix(my.ctx)
# design matrix
my.age = rep(c(rep(8,3),rep(12,3) ) ,2)
my.genotype = c(rep("WT",6),rep("R62",6))
dataDesign = data.frame( row.names = colnames( my.ctx.proc ), age = my.age, genotype = my.genotype )
# get matrix using age as a modeling covariate
dds <- DESeqDataSetFromMatrix(countData = my.ctx.proc, colData = dataDesign,design = ~ age + genotype)
# run DESeq and export normalized expression values
dds <- estimateSizeFactors(dds)
dds <- estimateDispersions(dds)
vsd <- getVarianceStabilizedData(dds)
# output result tables to files
my.out.ct.mat <- paste(Sys.Date(),"Cortex_Huntingdon_Model_model_log2_VST_counts_matrix.txt")
write.table(vsd, file = my.out.ct.mat , sep = "\t" , row.names = T, quote=F)
my.striat1 <- read.table('/Volumes/LaCie/Disease_model/Huntingdon_Disease_Model/RNAseq/STAR/Huntingdon_model_expression_Striatum_genes.txt',skip=1,header=T,sep="\t",stringsAsFactors=F)
my.striat <- my.striat1[,c(1,6:18)]
rownames(my.striat) <- my.striat[,1]
colnames(my.striat)
# design matrix
my.age = rep(c(rep(8,3),rep(12,3) ) ,2)
my.genotype = c(rep("WT",6),rep("R62",6))
dataDesign = data.frame( row.names = colnames( my.striat.proc ), age = my.age, genotype = my.genotype )
# get matrix using age as a modeling covariate
dds <- DESeqDataSetFromMatrix(countData = my.striat.proc, colData = dataDesign,design = ~ age + genotype)
# run DESeq and export normalized expression values
dds <- estimateSizeFactors(dds)
dds <- estimateDispersions(dds)
my.striat.proc <- preprocess_matrix(my.striat)
# design matrix
my.age = rep(c(rep(8,3),rep(12,3) ) ,2)
my.genotype = c(rep("WT",6),rep("R62",6))
dataDesign = data.frame( row.names = colnames( my.striat.proc ), age = my.age, genotype = my.genotype )
# get matrix using age as a modeling covariate
dds <- DESeqDataSetFromMatrix(countData = my.striat.proc, colData = dataDesign,design = ~ age + genotype)
# run DESeq and export normalized expression values
dds <- estimateSizeFactors(dds)
dds <- estimateDispersions(dds)
vsd <- getVarianceStabilizedData(dds)
# output result tables to files
my.out.ct.mat <- paste(Sys.Date(),"Striatum_Huntingdon_Model_model_log2_VST_counts_matrix.txt")
write.table(vsd, file = my.out.ct.mat , sep = "\t" , row.names = T, quote=F)
