my.tert.tree <- read.tree('/Volumes/MyBook_2/Genome_suite/clustalw2-I20140311-213952-0824-52834416-pg.ph.ph')
plot(my.tert.tree)
my.tert.tree <- read.tree('/Volumes/MyBook_2/Genome_suite/clustalw2-I20140311-214806-0732-65027968-pg.ph.ph')
plot(my.tert.tree)
pdf("tert_tree.pdf")
plot(my.tert.tree)
dev.off()
setwd('/Volumes/MyBook_2/Rando_data/Anshul_method/')

# coverage of old peaks by young peaks (measuring gain of breadth with age)
my.YGcovOLDpeaks.REP1 <- read.csv('HOMER_youngQSC_rep1_broad_peaks_coverage_in_oldQSCs.xls',header=T,sep="\t") 
my.YGcovOLDpeaks.REP2 <- read.csv('HOMER_youngQSC_rep2_broad_peaks_coverage_in_oldQSCs.xls',header=T,sep="\t")
# coverage of young peaks by old peaks (measuring loss of breadth with age)
my.OLDcovYGpeaks.REP1 <- read.csv('HOMER_oldQSC_rep1_broad_peaks_coverage_in_youngQSCs.xls',header=T,sep="\t") 
my.OLDcovYGpeaks.REP2 <- read.csv('HOMER_oldQSC_rep2_broad_peaks_coverage_in_youngQSCs.xls',header=T,sep="\t")


my.ecdf.gain.1 <- ecdf(my.YGcovOLDpeaks.REP1$End-my.YGcovOLDpeaks.REP1$Start)
my.ecdf.gain.2 <- ecdf(my.YGcovOLDpeaks.REP2$End-my.YGcovOLDpeaks.REP2$Start)
my.quart.gain.1 <- my.ecdf.gain.1(my.YGcovOLDpeaks.REP1$End-my.YGcovOLDpeaks.REP1$Start)
my.quart.gain.2 <- my.ecdf.gain.2(my.YGcovOLDpeaks.REP2$End-my.YGcovOLDpeaks.REP2$Start)

my.ecdf.loss.1 <- ecdf(my.OLDcovYGpeaks.REP1$End-my.OLDcovYGpeaks.REP1$Start)
my.ecdf.loss.2 <- ecdf(my.OLDcovYGpeaks.REP2$End-my.OLDcovYGpeaks.REP2$Start)
my.quart.loss.1 <- my.ecdf.loss.1(my.OLDcovYGpeaks.REP1$End-my.OLDcovYGpeaks.REP1$Start)
my.quart.loss.2 <- my.ecdf.loss.2(my.OLDcovYGpeaks.REP2$End-my.OLDcovYGpeaks.REP2$Start)


par(mfrow=c(2,2))
hist(my.YGcovOLDpeaks.REP1$Peak.Score,col="skyblue",main="Gain of breadth (rep 1)")
hist(my.YGcovOLDpeaks.REP2$Peak.Score,col="skyblue",main="Gain of breadth (rep 2)")
hist(my.OLDcovYGpeaks.REP1$Peak.Score,col="skyblue",main="Loss of breadth (rep 1)")
hist(my.OLDcovYGpeaks.REP2$Peak.Score,col="skyblue",main="Loss of breadth (rep 2)")
par(mfrow=c(1,1))
library(raster)
cv
cv(c(1,2,3,4))
sd(c(1,2,3,4))
sd(c(1,2,3,4))/mean(c(1,2,3,4))
var(c(1,2,3,4)/4)
var(c(1,2,3,4)/4)^(1/2)
(var(c(1,2,3,4))/4)^(1/2)
(var(c(1,2,3,4))/2.5)^(1/2)
(var(c(1,2,3,4)/2.5)^(1/2)
)
qqplot()
library(Gviz)
library(GenomicRanges)
ref <- GRanges('chr', IRanges(1, 258564))
ref_track <- GenomeAxisTrack(ref, lwd=4, fontsize=20)
load("/Users/benayoun/R_projects/Bethany/FAKE_learned_models_RE.RData")
install.packages("knitr")
source("http://bioconductor.org/biocLite.R")
biocLite("BiocParallel")
library(BiocParallel)
(qnorm(.975) + qnorm(.85))^2 / (0.5*0.5 * (0.1^2) )
(qnorm(.975) + qnorm(.85))^2 / (0.5*0.5 * (0.2^2) )
(qnorm(.975) + qnorm(.75))^2 / (0.5*0.5 * (0.2^2) )
(qnorm(.975) + qnorm(.75))^2 / (0.5*0.5 * (0.8^2) )
(qnorm(.975) + qnorm(.75))^2 / (0.1*0.6 * (0.8^2) )
(qnorm(.975) + qnorm(.75))^2 / (0.1*0.9 * (0.2^2) )
(qnorm(.975) + qnorm(.75))^2 / (0.5*0.5 * (0.2^2) )
mtcars
t.test(mtcars$mpg)
pt
pt(0.975,8)*1/sqrt(9)
qt(0.975,8)*1/sqrt(9)
?mtcars
mtcars$cyl
my.6 <- which(mtcars$cyl == 6)
my.4 <- which(mtcars$cyl == 4)
mtcars$mpg[my.4]-mtcars$mpg[my.6]
mtcars$mpg[my.6]
mtcars$mpg[my.4]
?t.test
t.test(mtcars$mpg[my.4],mtcars$mpg[my.6],var.equal = T)
t.test(mtcars$mpg[my.6],mtcars$mpg[my.4],var.equal = T)
sp <- sqrt(  (8*1.5^2 + 8* 1.8^2) / (16)   )
sp
(8*1.5^2 + 8* 1.8^2) / (16)
1100+c(-1,1)*qt(0.975,8)*30/sqrt(9)
6/qt(0.975,8)
sp <- sqrt(  (9*0.6^2 + 9* 0.68^2) / (18)   )
sp
(3-5)+c(-1,1)*qt(0.975,8)*sp/sqrt(20)
(3-5)+c(-1,1)*qt(0.975,8)*0.68/sqrt(20)
(3-5)+c(-1,1)*qt(0.975,8)*sp/sqrt(1/10+1/10)
(3-5)+c(-1,1)*qt(0.975,19)*sp/sqrt(1/10+1/10)
(3-5)+c(-1,1)*qt(0.975,19)*sp*sqrt(1/10+1/10)
(3-5)+c(-1,1)*qt(0.975,18)*sp*sqrt(1/10+1/10)
(3-5)+c(-1,1)*qt(0.975,18)*sp*(1/10+1/10)^0.5
sp*(1/10+1/10)^0.5
sqrt(2/10)
md <- 3 - 5
sp <- sqrt(  (9*0.6^2 + 9* 0.68^2) / (18)   )
semd <- sp * sqrt(1/10+1/10)
md + c(-1,1)*qt(0.975,10+10-2) * semd
sp <- sqrt(  (9*(0.6^2) + 9* (0.68^2)) / (18)   )
semd <- sp * sqrt(1/10+1/10)
md + c(-1,1)*qt(0.975,10+10-2) * semd
md + c(-1,1)*qt(0.95,10+10-2) * semd
md <- 6-4
sp <- sqrt(  (99*(0.5^2) + 99* (2^2)) / (198)   )
semd <- sp * sqrt(1/100+1/100)
md + c(-1,1)*qt(0.975,198) * semd
-3-1 +c(-1,1)*qt(0.95,16)*sp*(1/9+1/9)^0.5
n1 <- n2 <- 9
x1 <- -3  ##treated
x2 <- 1  ##placebo
s1 <- 1.5  ##treated
s2 <- 1.8  ##placebo
spsq <- ( (n1 - 1) * s1^2 + (n2 - 1) * s2^2) / (n1 + n2 - 2)
x1 - x2 + c(-1,1)*qt(0.95,16)*sqrt(spsq*(2/9))
n1 <- n2 <- 10
x1 <- 3  ##treated
x2 <- 5  ##placebo
s1 <- 0.6  ##treated
s2 <- 0.68  ##placebo
spsq <- ( (n1 - 1) * s1^2 + (n2 - 1) * s2^2) / (n1 + n2 - 2)
x1 - x2 + c(-1,1)*qt(0.95,n1 + n2 -2)*sqrt(spsq*(1/n1 + 1/n2))
x1 - x2 + c(-1,1)*qt(0.975,n1 + n2 -2)*sqrt(spsq*(1/n1 + 1/n2))
x2 - x1 + c(-1,1)*qt(0.975,n1 + n2 -2)*sqrt(spsq*(1/n1 + 1/n2))
c(-1,1)*qt(0.975,n1 + n2 -2)*sqrt(spsq*(1/n1 + 1/n2))
qt(0.975,n1 + n2 -2)*
1
n1 <- n2 <- 9
x1 <- -3  ##treated
x2 <- 1  ##placebo
s1 <- 1.5  ##treated
s2 <- 1.8  ##placebo
spsq <- ( (n1 - 1) * s1^2 + (n2 - 1) * s2^2) / (n1 + n2 - 2)
x1 - x2 + c(-1,1)*qt(0.95,16)*sqrt(spsq*(2/9))
x1 - x2 + c(-1,1)*qt(0.95,n1 + n2 -2)*sqrt(spsq*(1/n1 + 1/n2))
sqrt(  (9*0.6^2 + 9* 0.68^2) / (18)   )
n1 <- n2 <- 10
x1 <- 3  ## new
x2 <- 5  ##old
s1 <- 0.6  ##new
s2 <- 0.68  ##old
spsq <- ( (n1 - 1) * s1^2 + (n2 - 1) * s2^2) / (n1 + n2 - 2)
sqrt(spsq)
x1 - x2 + c(-1,1)*qt(0.975,n1 + n2 -2)*sqrt(0.5*(0.6+0.68)*(1/n1 + 1/n2))
knit2pdf
install.packages('rmarkdown')
render("/Users/benayoun/Dropbox/Coursera_data_science/rmd_stat_inference/07_Asymptopia.Rmd", "/Users/benayoun/Dropbox/Coursera_data_science/rmd_stat_inference/07_Asymptopia.pdf")
library(rmarkdown)
render("/Users/benayoun/Dropbox/Coursera_data_science/rmd_stat_inference/07_Asymptopia.Rmd", "/Users/benayoun/Dropbox/Coursera_data_science/rmd_stat_inference/07_Asymptopia.pdf")
install.packages(pandoc)
install.packages("pandoc")
my.data <- read.table("/Volumes/MyBook_2/Genome_suite/Annotations_MAKER/August6_Maker_gene_models_REPEATS_full_match.gff3",header=F,sep="\t")
head(my.data)
my.length <- my.data$V5-my.data$V4
hist(my.length)
sum(my.length)
sum(my.length)/1023205147
z.test
mean(mtcars$mpg)
qnorm(0.05)
sd(mtcars$mpg)
qnorm(0.05)*sd(mtcars$mpg)/sqrt(length(mtcars$mpg))  + mean(mtcars$mpg)
-qnorm(0.05)*sd(mtcars$mpg)/sqrt(length(mtcars$mpg))  + mean(mtcars$mpg)
my.4 <- which(mtcars$cyl == 4)
my.6 <- which(mtcars$cyl == 6)
t.test(mtcars$mpg[my.4],mtcars$mpg[my.6])
qnorm(0.975)
3 + c(-1,1)*qnorm(0.975)*1.1
3 + c(-1,1)*qnorm(0.025)*1.1
3 + c(-1,1)*qnorm(0.975)*1.1/sqrt(100)
?pbinom
pbinom(55,100,0.5)
pbinom(55,100,0.5,lower.tail = FALSE)
pbinom(54,100,0.5,lower.tail = FALSE)
?pois
?ppois
ppois(30/15800,520,lower.tail=False)
ppois(30/15800,520,lower.tail=F)
ppois(15800,520*30,lower.tail=F)
ppois(15800-1,520*30,lower.tail=F)
bas <- c(140,138,150,148,135)
treat <- c(132,135,151,146,130)
t.test(bas,treat,paired=T)
1100+c(-1,1)*30/sqrt(9)
1100+c(-1,1)*qnorm(0.975)*30/sqrt(9)
1100+c(-1,1)*qnorm(0.95)*30/sqrt(9)
1100+c(-1,1)*qt(0.975)*30/sqrt(9)
1100+c(-1,1)*qt(0.975,df=8)*30/sqrt(9)
1100+c(-1,1)*qt(0.975,df=8)*30
mean(bas)
mean(treat)
t.test(bas,treat)
t.test(bas)
sd(bas)
mean(bas)+c(-1,1)*qt(0.975,df=4)*sd(bas)/sqrt(5)
1100+c(-1,1)*qt(0.975,df=8)*30/sqrt(9)
?pbino
?pbinom
pbinom(2,4,0.5,lower.tail = FALSE)
ppois(10,0.01*1787)
sp <- sqrt( ( (9-1)*1.5^2 + (9-1)*1.8^2 )/(9+9-2)  )
sp
pt(4.419)
(-3-1)/(sp * sqrt(1/9+1/9))
pt(-5.121475)
pt(-5.121475,df=17)
pt(5.121475,df=17)
(1+3)//(sp * sqrt(1/9+1/9))
(1+3)/(sp * sqrt(1/9+1/9))
qnorm(0.95)
n <- 100
qnorm(0.95)*0.04/sqrt(n)
pnorm(0.006579415, mean = 0.01,sd=0.04/sqrt(n),lower.tail=F)
n <- 100
pnorm(qnorm(0.95)*0.04/sqrt(n), mean = 0.01,sd=0.04/sqrt(n),lower.tail=F)
n<-99
n <- 100
pnorm(qnorm(0.95)*0.04/sqrt(n), mean = 0.01,sd=0.04/sqrt(n),lower.tail=F)
n<-99
pnorm(qnorm(0.95)*0.04/sqrt(n), mean = 0.01,sd=0.04/sqrt(n),lower.tail=F)
n<-1000
pnorm(qnorm(0.95)*0.04/sqrt(n), mean = 0.01,sd=0.04/sqrt(n),lower.tail=F)
n<-200
pnorm(qnorm(0.95)*0.04/sqrt(n), mean = 0.01,sd=0.04/sqrt(n),lower.tail=F)
n<-150
pnorm(qnorm(0.95)*0.04/sqrt(n), mean = 0.01,sd=0.04/sqrt(n),lower.tail=F)
n<-140
pnorm(qnorm(0.95)*0.04/sqrt(n), mean = 0.01,sd=0.04/sqrt(n),lower.tail=F)
n<-135
pnorm(qnorm(0.95)*0.04/sqrt(n), mean = 0.01,sd=0.04/sqrt(n),lower.tail=F)
n<-137
pnorm(qnorm(0.95)*0.04/sqrt(n), mean = 0.01,sd=0.04/sqrt(n),lower.tail=F)
n<-138
pnorm(qnorm(0.95)*0.04/sqrt(n), mean = 0.01,sd=0.04/sqrt(n),lower.tail=F)
pnorm(qnorm(0.9)*0.04/sqrt(n), mean = 0.01,sd=0.04/sqrt(n),lower.tail=F)
install.packages("AppliedPredictiveModeling")
library(AppliedPredictiveModeling)
data(concrete)
library(caret)
set.seed(975)
inTrain = createDataPartition(mixtures$CompressiveStrength, p = 3/4)[[1]]
training = mixtures[ inTrain,]
testing = mixtures[-inTrain,]
plot(mixtures$CompressiveStrength)
colnames(mixtures)
summary(mixtures)
summary(mixtures,type="l")
plot(mixtures$CompressiveStrength,type='l')
plot(mixtures$CompressiveStrength,mixtures$FlyAsh)
plot(mixtures$CompressiveStrength,mixtures$Age)
length(mixtures$CompressiveStrength)
bla<-1:length(mixtures$CompressiveStrength)
plot(bla,mixtures$CompressiveStrength)
plot(bla[mixtures$Age < 5],mixtures$CompressiveStrength[mixtures$Age < 5])
plot(bla,mixtures$CompressiveStrength)
points(bla[mixtures$Age < 5],mixtures$CompressiveStrength[mixtures$Age < 5],col="red",pch=16)
points(bla[mixtures$Age > 40],mixtures$CompressiveStrength[mixtures$Age >40],col="blue",pch=16)
plot(bla,mixtures$CompressiveStrength)
points(bla[mixtures$Age < 5],mixtures$CompressiveStrength[mixtures$Age < 5],col="red",pch=16)
points(bla[mixtures$Age > 30],mixtures$CompressiveStrength[mixtures$Age >30],col="blue",pch=16)
points(bla[mixtures$Age > 50],mixtures$CompressiveStrength[mixtures$Age >50],col="gold",pch=16)
plot(bla,mixtures$CompressiveStrength)
points(bla[mixtures$Age < 5],mixtures$CompressiveStrength[mixtures$Age < 5],col="red",pch=16)
points(bla[mixtures$Age > 30],mixtures$CompressiveStrength[mixtures$Age >30],col="blue",pch=16)
points(bla[mixtures$Age > 50],mixtures$CompressiveStrength[mixtures$Age >60],col="gold",pch=16)
plot(bla,mixtures$CompressiveStrength)
points(bla[mixtures$Age < 5],mixtures$CompressiveStrength[mixtures$Age < 5],col="red",pch=16)
points(bla[mixtures$Age > 30],mixtures$CompressiveStrength[mixtures$Age >30],col="blue",pch=16)
points(bla[mixtures$Age > 50],mixtures$CompressiveStrength[mixtures$Age >50],col="gold",pch=16)
plot(bla,mixtures$CompressiveStrength)
points(bla[mixtures$Age < 5],mixtures$CompressiveStrength[mixtures$Age < 5],col="red",pch=16)
points(bla[mixtures$Age > 30],mixtures$CompressiveStrength[mixtures$Age >30],col="blue",pch=16)
points(bla[mixtures$Age > 100],mixtures$CompressiveStrength[mixtures$Age >100],col="gold",pch=16)
plot(bla,mixtures$CompressiveStrength)
points(bla[mixtures$Age < 5],mixtures$CompressiveStrength[mixtures$Age < 5],col="red",pch=16)
points(bla[mixtures$Age > 10],mixtures$CompressiveStrength[mixtures$Age >10],col="pink",pch=16)
points(bla[mixtures$Age > 30],mixtures$CompressiveStrength[mixtures$Age >30],col="blue",pch=16)
points(bla[mixtures$Age > 100],mixtures$CompressiveStrength[mixtures$Age >100],col="gold",pch=16)
hist(mixtures$FlyAsh)
plot(bla,mixtures$CompressiveStrength)
points(bla[mixtures$FlyAsh < 0.01],mixtures$CompressiveStrength[mixtures$FlyAsh < 0.01],col="red",pch=16)
points(bla[mixtures$FlyAsh > 0.04],mixtures$CompressiveStrength[mixtures$FlyAsh >0.04],col="pink",pch=16)
points(bla[mixtures$FlyAsh > 0.06],mixtures$CompressiveStrength[mixtures$FlyAsh >0.06],col="blue",pch=16)
points(bla[mixtures$FlyAsh > 0.08],mixtures$CompressiveStrength[mixtures$FlyAsh >0.08],col="gold",pch=16)
hist(mixtures$SuperPlasticizer)
hist(as.numeric(mixtures$SuperPlasticizer))
mixtures$SuperPlasticizer
mixtures$Superplasticizer
hist(mixtures$Superplasticizer)
log(mixtures$Superplasticizer)
hist(log(mixtures$Superplasticizer+1))
hist(mixtures$Superplasticizer)
library(caret)
library(AppliedPredictiveModeling)
set.seed(3433)
data(AlzheimerDisease)
adData = data.frame(diagnosis,predictors)
inTrain = createDataPartition(adData$diagnosis, p = 3/4)[[1]]
training = adData[ inTrain,]
testing = adData[-inTrain,]
AlzheimerDisease
data(AlzheimerDisease)
AlzheimerDisease
summary(AlzheimerDisease)
library(AppliedPredictiveModeling)
adData
sumamry(adData)
summary(adData)
colnames(adData)
colnames(adData[,58:69])
CR.pca <- prcomp(adData[,58:69], scale = TRUE)
summary(CR.pca)
fit1 <-train(diagnosis ~ adData[,58:69],method="glm",preProcess="pca",pcaComp=7)
fit1 <-train(diagnosis ~ as.data.frame(adData[,58:69]),method="glm",preProcess="pca",pcaComp=7)
as.data.frame(adData[,58:69])
colnames(adData[,58:69])
adData[1:2,58:69]
bla <- as.data.frame(adData[,58:69])
fit1 <-train(diagnosis ~ bla,method="glm",preProcess="pca",pcaComp=7)
bla <- data.frame(adData[,58:69])
fit1 <-train(diagnosis ~ bla,method="glm",preProcess="pca",pcaComp=7)
head(bla)
fit1 <-train(diagnosis ~ bla,method="glm",preProcess="pca",pcaComp=7,data=adData)
summary(bla)
fit1 <-train(training$diagnosis ~ training[,58:69],method="glm",preProcess="pca",pcaComp=7,data=adData)
head(training)
training2 <- training[,58:69]
fit1 <-train(training$diagnosis ~ training2,method="glm",preProcess="pca",pcaComp=7)
fit1 <-train(training$diagnosis ~ training$IL_11 + training$IL_13 +training$IL_16 +training$IL_17E +training$IL_1alpha +training$IL_3 +training$IL_4 +training$IL_5 +training$IL_6 +training$IL_6_Receptor +training$IL_7 +training$IL_8,
method="glm",preProcess="pca",pcaComp=7)
fit1
CR.pca <- prcomp(training[,58:69], scale = TRUE)
summary(CR.pca)
preProc <- preProcess(training[,58:69],method="pca",pcaComp=7)
preProc <- preProcess(training[,58:69],method="pca",pcaComp=7)
training2 <- predict(preProc,training[,58:69]))
fit1 <-train(training$diagnosis ~ .,
method="glm",preProcess="pca",data=training2)
fit1
testing2 <- predict(preProc,testing[,58:69]))
confusionMatrix(testing$diagnosis,predict(fit1,testing2))
testing2 <- predict(preProc,testing[,58:69])
confusionMatrix(testing$diagnosis,predict(fit1,testing2))
colnames(testing[,58:69])
names(testing2)
preProc <- preProcess(training[,58:69],method="pca",pcaComp=7)
training2 <- predict(preProc,training[,58:69]))
fit1 <-train(training$diagnosis ~ .,
method="glm",preProcess="pca",data=training2)
testing2 <- predict(preProc,testing[,58:69])
confusionMatrix(testing$diagnosis,predict(fit1,testing2))
preProc
fit1 <-train(training$diagnosis ~ .,
method="glm",preProcess="pca",data=training2, method="pca",trControl = trainControl(preProcOptions = list(thresh = 0.8))
confusionMatrix(testing$diagnosis,predict(fit1,testing))
fit1 <-train(training$diagnosis ~ .,
method="glm",preProcess="pca",data=training2, method="pca",trControl = trainControl(preProcOptions = list(thresh = 0.8)))
fit1 <-train(training$diagnosis ~ .,
method="glm",preProcess="pca",data=training2, trControl = trainControl(preProcOptions = list(thresh = 0.8)))
confusionMatrix(testing$diagnosis,predict(fit1,testing))
fit1 <-train(training$diagnosis ~ .,
method="glm",preProcess="pca",data=training, trControl = trainControl(preProcOptions = list(thresh = 0.8)))
confusionMatrix(testing$diagnosis,predict(fit1,testing))
fit2 <-train(training$diagnosis ~ .,
method="glm",data=training)
confusionMatrix(testing$diagnosis,predict(fit2,testing))
training2 <- training[,58:69]
testing2 <- testing[,58:69]
training2 <- training[,58:69]
testing2 <- testing[,58:69]
fit1 <-train(training$diagnosis ~ .,
method="glm",preProcess="pca",data=training2, trControl = trainControl(preProcOptions = list(thresh = 0.8)))
confusionMatrix(testing$diagnosis,predict(fit1,testing2))
fit2 <-train(training$diagnosis ~ .,
method="glm",data=training2)
confusionMatrix(testing$diagnosis,predict(fit2,testing2))
1-0.9^50
0.9^50
setwd('/Volumes/MyBookStudio/BD_aging_project/Heart/H3K4me3_breadth')
source("/Users/benayoun/Softwares/Breadth_compare_bundle/DiffBreadthFun.R")
my.heart.breadth.data <- read.table("Merged_ALL_AGES_MERGED_Heart_H3K4me3.PARSED_INTERSECTIONS.xls",header=T,sep="\t")
## QC and general analysis
breadth_rep_qual(my.heart.breadth.data)
#
BD_cov_changes(my.heart.breadth.data)
BD_cov_changes_redux(my.heart.breadth.data)
pdf("Changes_to_top5percent_AGING_hearts_coverage.pdf",width=15)
BD_cov_changes(my.heart.breadth.data)
dev.off()
pdf("Changes_to_top5percent_AGING_hearts_coverage_REDUX.pdf",width=15)
BD_cov_changes_redux(my.heart.breadth.data)
dev.off()
pdf("hearts_breadth_erosion_per_qt_3m_vs_29m.pdf",width=15)
erosion_breadth_test(my.heart.breadth.data,"hearts (3 vs. 29m)")
dev.off()
my.cols <- c(10,13,16,19,22,25)
my.new.data <- my.heart.breadth.data[,my.cols]
rownames(my.new.data) <- my.heart.breadth.data[,1]
colnames(my.new.data) <- c("3m1","3m2","12m1","12m2","29m1","29m2")
heartsdesign.3v12 <- data.frame(   row.names = colnames( my.new.data )[1:4],   condition = c("3m","3m","12m","12m") )
dds.3v12 <- DESeqDataSetFromMatrix(countData = my.new.data[,1:4], colData = heartsdesign.3v12, design = ~ condition)
dds.3v12 <- DESeq(dds.3v12, fitType = "parametric")
p
plotDispEsts(dds.3v12)
pdf("DEseq2_Plot_dispersion_estimates_pooled_local_hearts_H3K4me3_breadth_3v12.pdf")
plotDispEsts(dds.3v12)
dev.off()
heartsdesign.12v29 <- data.frame(   row.names = colnames( my.new.data )[3:6],   condition = c("12m","12m","29m","29m") )
dds.12v29 <- DESeqDataSetFromMatrix(countData = my.new.data[,3:6], colData = heartsdesign.12v29, design = ~ condition)
dds.12v29 <- DESeq(dds.12v29, fitType = "parametric")
pdf("DEseq2_Plot_dispersion_estimates_pooled_local_hearts_H3K4me3_breadth_12v29.pdf")
plotDispEsts(dds.12v29)
dev.off()
heartsdesign.3v29 <- data.frame(   row.names = colnames( my.new.data )[c(1:2,5:6)],   condition = c("3m","3m","29m","29m") )
dds.3v29 <- DESeqDataSetFromMatrix(countData = my.new.data[,c(1:2,5:6)], colData = heartsdesign.3v29, design = ~ condition)
dds.3v29 <- DESeq(dds.3v29, fitType = "parametric")
pdf("DEseq2_Plot_dispersion_estimates_pooled_local_hearts_H3K4me3_breadth_3v29.pdf")
plotDispEsts(dds.3v29)
dev.off()
res.3vs12m <- results(dds.3v12, contrast = c("condition","3m","12m"))
res.12vs29m <- results(dds.12v29, contrast = c("condition","12m","29m"))
res.3vs29m <- results(dds.3v29, contrast = c("condition","3m","29m"))
pdf("MAPLOTS_DESEeq2_hearts_aging_NEW_VERSION.pdf")
MAPlot_breadth(res.3vs12m, "3 vs 12m hearts")
MAPlot_breadth(res.12vs29m, "12 vs 29m hearts")
MAPlot_breadth(res.3vs29m, "3 vs 29m hearts")
dev.off()
pdf("PV_hists_DESEeq2_hearts_aging.pdf")
hist(res.3vs12m$pval, breaks=100, col="skyblue", border="slateblue", main="")
hist(res.12vs29m$pval, breaks=100, col="skyblue", border="slateblue", main="")
hist(res.3vs29m$pval, breaks=100, col="skyblue", border="slateblue", main="")
dev.off()
length(which(res.3vs12m$padj < 0.05)) # 622
length(which(res.12vs29m$padj < 0.05)) # 672
length(which(res.3vs29m$padj < 0.05)) # 672
write.table( cbind(my.heart.breadth.data[,1:8],res.3vs12m), file="DESeq2_hearts_AGING_Breadth_3vs12m.xls",sep="\t",quote=F,row.names=F)
write.table( cbind(my.heart.breadth.data[,1:8],res.12vs29m), file="DESeq2_hearts_AGING_Breadth_12vs29m.xls",sep="\t",quote=F,row.names=F)
write.table( cbind(my.heart.breadth.data[,1:8],res.3vs29m), file="DESeq2_hearts_AGING_Breadth_3vs29m.xls",sep="\t",quote=F,row.names=F)
pheatmap(my.new.data[which(res.3vs12m$padj < 0.05),],scale = "row",cluster_cols = F,show_rownames = F, main="Differential 3 vs 12m (FDR < 5%)",cellwidth = 50, cellheight = 0.6)
pheatmap(my.new.data[which(res.12vs29m$padj < 0.05),],scale = "row",cluster_cols = F,show_rownames = F, main="Differential 12 vs 29m (FDR < 5%)",cellwidth = 50, cellheight = 0.6)
pheatmap(my.new.data[which(res.3vs29m$padj < 0.05),],scale = "row",cluster_cols = F,show_rownames = F, main="Differential 3 vs 29m (FDR < 5%)",cellwidth = 50, cellheight = 0.6)
pdf("Heatmaps_of_significant_breadth_changes_heart_aging.pdf")
pheatmap(my.new.data[which(res.3vs12m$padj < 0.05),],scale = "row",cluster_cols = F,show_rownames = F, main="Differential 3 vs 12m (FDR < 5%)",cellwidth = 50, cellheight = 0.5)
pheatmap(my.new.data[which(res.12vs29m$padj < 0.05),],scale = "row",cluster_cols = F,show_rownames = F, main="Differential 12 vs 29m (FDR < 5%)",cellwidth = 50, cellheight = 0.5)
pheatmap(my.new.data[which(res.3vs29m$padj < 0.05),],scale = "row",cluster_cols = F,show_rownames = F, main="Differential 3 vs 29m (FDR < 5%)",cellwidth = 50, cellheight = 0.5)
dev.off()
my.3v12.lost.de2 <- intersect(which(res.3vs12m$padj < 0.05),which(res.3vs12m$log2FoldChange > 0))
my.3v12.gained.de2 <- intersect(which(res.3vs12m$padj < 0.05),which(res.3vs12m$log2FoldChange < 0))
write.table(cbind(my.heart.breadth.data[,1:7],res.3vs12m)[my.3v12.lost.de2,c(2,3,4,1)], file="DESeq2_hearts_diff_Breadth_3vs12m_LOST_ALL.bed",sep="\t",quote=F,row.names=F,col.names=F)
write.table(cbind(my.heart.breadth.data[,1:7],res.3vs12m)[my.3v12.gained.de2,c(2,3,4,1)], file="DESeq2_hearts_diff_Breadth_3vs12m_GAINED_ALL.bed",sep="\t",quote=F,row.names=F,col.names=F)
my.12vs29.lost.de2 <- intersect(which(res.12vs29m$padj < 0.05),which(res.12vs29m$log2FoldChange > 0))
my.12vs29.gained.de2 <- intersect(which(res.12vs29m$padj < 0.05),which(res.12vs29m$log2FoldChange < 0))
write.table(cbind(my.heart.breadth.data[,1:7],res.12vs29m)[my.12vs29.lost.de2,c(2,3,4,1)], file="DESeq2_hearts_diff_Breadth_12vs29m_LOST_ALL.bed",sep="\t",quote=F,row.names=F,col.names=F)
write.table(cbind(my.heart.breadth.data[,1:7],res.12vs29m)[my.12vs29.gained.de2,c(2,3,4,1)], file="DESeq2_hearts_diff_Breadth_12vs29m_GAINED_ALL.bed",sep="\t",quote=F,row.names=F,col.names=F)
my.3vs29.lost.de2 <- intersect(which(res.3vs29m$padj < 0.05),which(res.3vs29m$log2FoldChange > 0))
my.3vs29.gained.de2 <- intersect(which(res.3vs29m$padj < 0.05),which(res.3vs29m$log2FoldChange < 0))
write.table(cbind(my.heart.breadth.data[,1:7],res.3vs29m)[my.3vs29.lost.de2,c(2,3,4,1)], file="DESeq2_hearts_diff_Breadth_3vs29m_LOST_ALL.bed",sep="\t",quote=F,row.names=F,col.names=F)
write.table(cbind(my.heart.breadth.data[,1:7],res.3vs29m)[my.3vs29.gained.de2,c(2,3,4,1)], file="DESeq2_hearts_diff_Breadth_3vs29m_GAINED_ALL.bed",sep="\t",quote=F,row.names=F,col.names=F)
my.3m.qt <- get_my_av_qt (my.new.data[,1:2])
my.12m.qt <- get_my_av_qt (my.new.data[,3:4])
my.29m.qt <- get_my_av_qt (my.new.data[,5:6])
my.null <- which(apply(my.new.data,1,sum)==0)
my.bd.bckgd <- union(union(my.3m.qt[[20]],my.12m.qt[[20]]),my.29m.qt[[20]])
write.table(my.heart.breadth.data[my.bd.bckgd,c(2,3,4,1)], file = "hearts_AGING_ALL_broad_H3K4me3_domain.bed",quote=F,row.names=F,col.names=F,sep="\t")
write.table(my.heart.breadth.data[-my.null,c(2,3,4,1)], file = "hearts_AGING_ALL_H3K4me3_domain.bed",quote=F,row.names=F,col.names=F,sep="\t")
# 3 vs 12
my.3v12.changedBD.de2 <- intersect(which(res.3vs12m$padj < 0.05), union(my.3m.qt[[20]],my.12m.qt[[20]]) )
my.3v12.lostBD.de2 <- intersect(my.3v12.changedBD.de2,which(res.3vs12m$log2FoldChange > 0))
my.3v12.gainedBD.de2 <- intersect(my.3v12.changedBD.de2,which(res.3vs12m$log2FoldChange < 0))
length(my.3v12.changedBD.de2) # 44
length(my.3v12.lostBD.de2) # 29
length(my.3v12.gainedBD.de2) # 15
write.table(cbind(my.heart.breadth.data[,1:7],res.3vs12m)[my.3v12.changedBD.de2,], file="DESeq2_hearts_diff_Breadth_3vs12m_CHANGED_BROAD_ANNOT.xls",sep="\t",quote=F,row.names=F,col.names=F)
write.table(cbind(my.heart.breadth.data[,1:7],res.3vs12m)[my.3v12.changedBD.de2,c(2,3,4,1)], file="DESeq2_hearts_diff_Breadth_3vs12m_CHANGED_BROAD.bed",sep="\t",quote=F,row.names=F,col.names=F)
write.table(cbind(my.heart.breadth.data[,1:7],res.3vs12m)[my.3v12.lostBD.de2,c(2,3,4,1)], file="DESeq2_hearts_diff_Breadth_3vs12m_LOST_BROAD.bed",sep="\t",quote=F,row.names=F,col.names=F)
write.table(cbind(my.heart.breadth.data[,1:7],res.3vs12m)[my.3v12.gainedBD.de2,c(2,3,4,1)], file="DESeq2_hearts_diff_Breadth_3vs12m_GAINED_BROAD.bed",sep="\t",quote=F,row.names=F,col.names=F)
pheatmap(my.new.data[my.3v12.changedBD.de2,],scale = "row",cluster_cols = F,show_rownames = F, main="Differential BD 3 vs 12m (FDR < 5%)",
cellwidth = 50, cellheight = 3,border_color = NA)
pdf("3_vs_12m_BD_Heatmaps_of_significant_breadth_changes.pdf")
pheatmap(my.new.data[my.3v12.changedBD.de2,],scale = "row",cluster_cols = F,show_rownames = F, main="Differential BD 3 vs 12m (FDR < 5%)",
cellwidth = 50, cellheight = 3,border_color = NA)
dev.off()
# 12 vs 29
my.12vs29.changedBD.de2 <- intersect(which(res.12vs29m$padj < 0.05), union(my.12m.qt[[20]],my.29m.qt[[20]]) )
my.12vs29.lostBD.de2 <- intersect(my.12vs29.changedBD.de2,which(res.12vs29m$log2FoldChange > 0))
my.12vs29.gainedBD.de2 <- intersect(my.12vs29.changedBD.de2,which(res.12vs29m$log2FoldChange < 0))
length(my.12vs29.changedBD.de2) # 29
length(my.12vs29.lostBD.de2) # 16
length(my.12vs29.gainedBD.de2) # 13
write.table(cbind(my.heart.breadth.data[,1:7],res.12vs29m)[my.12vs29.changedBD.de2,], file="DESeq2_hearts_diff_Breadth_12vs29m_CHANGED_BROAD_ANNOT.xls",sep="\t",quote=F,row.names=F,col.names=F)
write.table(cbind(my.heart.breadth.data[,1:7],res.12vs29m)[my.12vs29.changedBD.de2,c(2,3,4,1)], file="DESeq2_hearts_diff_Breadth_12vs29m_CHANGED_BROAD.bed",sep="\t",quote=F,row.names=F,col.names=F)
write.table(cbind(my.heart.breadth.data[,1:7],res.12vs29m)[my.12vs29.lostBD.de2,c(2,3,4,1)], file="DESeq2_hearts_diff_Breadth_12vs29m_LOST_BROAD.bed",sep="\t",quote=F,row.names=F,col.names=F)
write.table(cbind(my.heart.breadth.data[,1:7],res.12vs29m)[my.12vs29.gainedBD.de2,c(2,3,4,1)], file="DESeq2_hearts_diff_Breadth_12vs29m_GAINED_BROAD.bed",sep="\t",quote=F,row.names=F,col.names=F)
pdf("12_vs_29m_BD_Heatmaps_of_significant_breadth_changes.pdf")
pheatmap(my.new.data[my.12vs29.changedBD.de2,],scale = "row",cluster_cols = F,show_rownames = F, main="Differential BD 12 vs 29m (FDR < 5%)",
cellwidth = 50, cellheight = 3,border_color = NA)
dev.off()
# 3 vs 29
my.3vs29.changedBD.de2 <- intersect(which(res.3vs29m$padj < 0.05), union(my.3m.qt[[20]],my.29m.qt[[20]]) )
my.3vs29.lostBD.de2 <- intersect(my.3vs29.changedBD.de2,which(res.3vs29m$log2FoldChange > 0))
my.3vs29.gainedBD.de2 <- intersect(my.3vs29.changedBD.de2,which(res.3vs29m$log2FoldChange < 0))
length(my.3vs29.changedBD.de2) # 37
length(my.3vs29.lostBD.de2) # 25
length(my.3vs29.gainedBD.de2) # 12
write.table(cbind(my.heart.breadth.data[,1:7],res.3vs29m)[my.3vs29.changedBD.de2,], file="DESeq2_hearts_diff_Breadth_3vs29m_CHANGED_BROAD_ANNOT.xls",sep="\t",quote=F,row.names=F,col.names=F)
write.table(cbind(my.heart.breadth.data[,1:7],res.3vs29m)[my.3vs29.changedBD.de2,c(2,3,4,1)], file="DESeq2_hearts_diff_Breadth_3vs29m_CHANGED_BROAD.bed",sep="\t",quote=F,row.names=F,col.names=F)
write.table(cbind(my.heart.breadth.data[,1:7],res.3vs29m)[my.3vs29.lostBD.de2,c(2,3,4,1)], file="DESeq2_hearts_diff_Breadth_3vs29m_LOST_BROAD.bed",sep="\t",quote=F,row.names=F,col.names=F)
write.table(cbind(my.heart.breadth.data[,1:7],res.3vs29m)[my.3vs29.gainedBD.de2,c(2,3,4,1)], file="DESeq2_hearts_diff_Breadth_3vs29m_GAINED_BROAD.bed",sep="\t",quote=F,row.names=F,col.names=F)
pdf("3_vs_29m_BD_Heatmaps_of_significant_breadth_changes.pdf")
pheatmap(my.new.data[my.3vs29.changedBD.de2,],scale = "row",cluster_cols = F,show_rownames = F, main="Differential BD 12 vs 29m (FDR < 5%)",
cellwidth = 50, cellheight = 3,border_color = NA)
dev.off()
pheatmap(my.new.data[my.3vs29.changedBD.de2,],scale = "row",cluster_cols = F,show_rownames = F, main="Differential BD 12 vs 29m (FDR < 5%)",
cellwidth = 50, cellheight = 3,border_color = NA)
my.n.changed <- rep(0,20)
my.n.gain <- rep(0,20)
my.n.loss <- rep(0,20)
for (i in 1:20) {
my.changed <- intersect(which(res.3vs29m$padj < 0.05), union(my.3m.qt[[i]],my.29m.qt[[i]]) )
my.n.changed[i] <- length(my.changed)
print(length(my.changed))
my.loss <- intersect(my.changed,which(res.3vs29m$log2FoldChange > 0))
my.n.loss[i] <- length(my.loss)
my.gain <- intersect(my.changed,which(res.3vs29m$log2FoldChange < 0))
my.n.gain[i] <- length(my.gain)
}
barplot(my.n.changed,names=seq(0,95,5),col=c(rep("#A7DEEF",19),"#1AA1DB"), ylab="Number of H3K4me3 domains with differential breadth",
xlab="H3K4me3 breadth quantile in 3m",cex.names=0.8, main="Olfactory Bulb (3 vs 29m)")
barplot(my.n.loss,names=seq(0,95,5),col=c(rep("#A7DEEF",19),"#1AA1DB"), ylab="Number of H3K4me3 domains with lost breadth",
xlab="H3K4me3 breadth quantile in 3m",cex.names=0.8, main="Olfactory Bulb (3 vs 29m)")
barplot(my.n.gain,names=seq(0,95,5),col=c(rep("#A7DEEF",19),"#1AA1DB"), ylab="Number of H3K4me3 domains with gained breadth",
xlab="H3K4me3 breadth quantile in 3m",cex.names=0.8, main="Olfactory Bulb (3 vs 29m)")
pdf("Number_diff_peaks_heart_3vs29m_barplot.pdf")
barplot(my.n.changed,names=seq(0,95,5),col=c(rep("#A7DEEF",19),"#1AA1DB"), ylab="Number of H3K4me3 domains with differential breadth",
xlab="H3K4me3 breadth quantile in 3m",cex.names=0.8, main="Olfactory Bulb (3 vs 29m)")
barplot(my.n.loss,names=seq(0,95,5),col=c(rep("#A7DEEF",19),"#1AA1DB"), ylab="Number of H3K4me3 domains with lost breadth",
xlab="H3K4me3 breadth quantile in 3m",cex.names=0.8, main="Olfactory Bulb (3 vs 29m)")
barplot(my.n.gain,names=seq(0,95,5),col=c(rep("#A7DEEF",19),"#1AA1DB"), ylab="Number of H3K4me3 domains with gained breadth",
xlab="H3K4me3 breadth quantile in 3m",cex.names=0.8, main="Olfactory Bulb (3 vs 29m)")
dev.off()
