"H3K4me3",
"ES_UMR",
"H3k36me3",
"X5hmC",
"X5fC",
"ES_LMR",
"H3k04me1",
"H2A.Z",
"Ac_H2A.Z",
"H3k09me3",
"Sox2",
"Nanog",
"Ctcf",
"Eset",
"bivalent_domain",
"H3K27me3",
"Ezh2",
"Ring1b",
"Smc3",
"Smc1",
"Gquad",
"Pol2_Ser2P",
"Rest",
"Smarc4",
"Wdr5",
"Klf4",
"CHD4",
"E2f1",
"Med1",
"Oct.04",
"mof",
"ES_FMR")
################################################################
## Required libraries
library(gplots) ## Required for heatmaps.2
library(RColorBrewer)
cols <- rev(colorRampPalette(brewer.pal(9,"RdBu"))(100))
dir.main <- "~/replication_origins"
dir.results <- file.path(dir.main, "analysis", "f4_rnase","marks","mark_correlations_stats")
dir.create(dir.results, showWarnings=FALSE, recurs=TRUE)
dir.figures <-file.path(dir.results, "figures")
dir.create(dir.figures, showWarnings=FALSE, recurs=TRUE)
setwd(dir.main)
################################################################
rand.rep <- length(rand.cor.files) ## number of replicates for random simulations
type <- "peaks" ## SWEMBL peaks matching at least one SICER zone
## type <- "zones" ## SICER zones overlapping at least one SWEMBL peak
system(paste("open", dir.figures))
dir.main <- "~/replication_origins"
dir.main
dir.main <- "~/replication_origins"
setwd(dir.main)
setwd("analysis/stats_overlaps/")
dir.overlaps <- file.path(dir.main, "analysis/stats_overlaps/")
setwd(dir.overlaps)
list.files
list.files()
setwd("analysis/stats_overlaps/")
list.files()
## Set woring directory
dir.main <- "~/replication_origins"
dir.overlaps <- file.path(dir.main, "analysis/stats_overlaps/")
setwd(dir.overlaps)
sicer.iz <- load(file = "SICER_IZ_table.txt")
sicer.iz <- read.table(file = "SICER_IZ_table.txt")
dim(sicer.iz)
names(sicer.iz)
sicer.iz <- read.table(file = "SICER_IZ_table.txt", header = TRUE)
dim(sicer.iz)
names(sicer.iz)
sicer.iz.cor <- cor(sicer.iz)
sicer.iz.cor <- cor(sicer.iz, na.omit=TRUE)
?cor
sicer.iz.cor <- cor(sicer.iz, na.rm=TRUE)
sicer.iz.cor <- cor(na.omit(sicer.iz))
dim
head(sicer.iz)
data.types = c("SICER_IZ", "SWEMBL_IS")
data.type <- "SICER_IZ"
marks.per.region <- read.table(file = "SICER_IZ_table.txt", header = TRUE)
row.names(marks.per.region)
head(sicer.iz)
row.names(marks.per.region)  <- paste(sep="_",
marks.per.region$chromosome,
marks.per.region$start)
head(marks.per.region)
row.names(marks.per.region)  <- paste(sep="_",
marks.per.region$chromosome,
marks.per.region$start,
marks.per.region$end,
marks.per.region$lenght)
marks.per.region <- marks.per.region[, -(1:4)]
dim(sicer.iz)
dim(marks.per.region)
marks.per.region[1:10, 1:4]
names(marks.per.region)
marks.per.region[1:10, 1:4]
View(marks.per.region)
mark.corr <- cor(na.omit(marks.per.region))
region.annot.file <- paste(sep="", data.type, "_table.txt")
marks.per.region <- read.table(file = "SICER_IZ_table.txt", header = TRUE)
marks.per.region <- read.table(file = region.annot.file, header = TRUE)
row.names(marks.per.region)  <- paste(sep="_",
marks.per.region$chromosome,
marks.per.region$start,
marks.per.region$end,
marks.per.region$lenght)
View(marks.per.region)
marks.per.region <- marks.per.region[, -(1:8)]
View(marks.per.region)
## Compute correlation between columns
mark.corr <- cor(na.omit(marks.per.region))
heatmap(mark.corr)
library("stats4bioinfo")
calc.correlations <- function(ori) {
base=substr(basename(ori), 1, nchar(basename(ori)) - 4)
cat(base, sep="\n")
##-----------------------------
##-- Correlation matrix
##-----------------------------
##-- Read CSV file
df = read.table(ori, header=TRUE, sep = "")
##-- Extract the coordinates
df2 = data.frame(df[,4],df[,9:43])
##-- Correlation
df2_corr = cor(df2[,2:36], use="all.obs", method="pearson")
##-- Get the order of the Ori heatmap
##cols=rev(colorRampPalette(brewer.pal(9,"RdBu"))(100))
##hm = heatmap.2(df2_corr, scale="none", trace="none", col=cols, main=paste("Correlation between Marks\n", base) )
##order = (hm$rowInd)
##-- Change clustering method
cols=rev(colorRampPalette(brewer.pal(9,"RdBu"))(100))
hc=function(x) {hclust(dist(x), method = "average", members = NULL)}
hm = heatmap.2(df2_corr, scale="none", trace="none", col=cols, main=paste("Correlation between Marks\n", base), hclustfun = hc  )
order = (hm$rowInd)
}
force.order <- TRUE
mark.order <- c("H3K56ac",
"H3k27ac",
"p300",
"TBP",
"Pol2_Ser5P",
"Pol2_global",
"H3K4me2",
"Dnase",
"H3k09ac",
"CpG",
"H3K4me3",
"ES_UMR",
"H3k36me3",
"X5hmC",
"X5fC",
"ES_LMR",
"H3k04me1",
"H2A.Z",
"Ac_H2A.Z",
"H3k09me3",
"Sox2",
"Nanog",
"Ctcf",
"Eset",
"bivalent_domain",
"H3K27me3",
"Ezh2",
"Ring1b",
"Smc3",
"Smc1",
"Gquad",
"Pol2_Ser2P",
"Rest",
"Smarc4",
"Wdr5",
"Klf4",
"CHD4",
"E2f1",
"Med1",
"Oct.04",
"mof",
"ES_FMR")
################################################################
## Required libraries
library(gplots) ## Required for heatmaps.2
library(RColorBrewer)
cols <- rev(colorRampPalette(brewer.pal(9,"RdBu"))(100))
dir.main <- "~/replication_origins"
dir.results <- file.path(dir.main, "analysis", "f4_rnase","marks","mark_correlations_stats")
dir.create(dir.results, showWarnings=FALSE, recurs=TRUE)
dir.figures <-file.path(dir.results, "figures")
dir.create(dir.figures, showWarnings=FALSE, recurs=TRUE)
setwd(dir.main)
library(gplots) ## Required for heatmaps.2
library(RColorBrewer)
library(gplots) ## Required for heatmaps.2
library(RColorBrewer)
cols <- rev(colorRampPalette(brewer.pal(9,"RdBu"))(100))   ## Color palette
cols <- rev(colorRampPalette(brewer.pal(9,"RdBu"))(100))   ## Define color palette
heatmap(mark.corr)
heatmap(mark.corr, col=cols)
cols <- rev(colorRampPalette(brewer.pal(1,"RdBu"))(100))   ## Define color palette
heatmap(mark.corr, col=cols)
cols <- rev(colorRampPalette(brewer.pal(1,"RdBu"))(100))   ## Define color palette
cols <- rev(colorRampPalette(brewer.pal(9,"RdBu")))   ## Define color palette
(100)
cols <- rev(colorRampPalette(brewer.pal(9,"RdBu"))(100))   ## Define color palette
cols <- rev(colorRampPalette(brewer.pal(9,"RdBu")))   ## Define color palette
(100)
heatmap(mark.corr, col=cols)
hist(mark.corr)
hist(mark.corr, breaks=100)
hist(mark.corr, breaks=50)
ori.corr <- cor(na.omit(marks.per.region))
## raw a heatmap of the correlation
cols <- rev(colorRampPalette(brewer.pal(9,"RdBu"))(100))   ## Define color palette
hm <- heatmap.2(as.matrix(ori.cor),  scale="none", trace="none",
main=paste("Correlation between marks\nat origin", type),
col=cols, breaks=seq(-1,1,2/length(cols)))
ori.cor <- cor(na.omit(marks.per.region))
## raw a heatmap of the correlation
cols <- rev(colorRampPalette(brewer.pal(9,"RdBu"))(100))   ## Define color palette
hm <- heatmap.2(as.matrix(ori.cor),  scale="none", trace="none",
main=paste("Correlation between marks\nat origin", type),
col=cols, breaks=seq(-1,1,2/length(cols)))
hist(mark.corr, breaks=50)
cols <- rev(colorRampPalette(brewer.pal(9,"RdBu"))(100))   ## Define color palette
hm <- heatmap.2(as.matrix(ori.cor),  scale="none", trace="none",
main=paste("Correlation between marks\nat origin", type),
col=cols, breaks=seq(-1,1,2/length(cols)))
data.types = c("SICER_IZ", "SWEMBL_IS")
type <- "SICER_IZ"
hist(mark.corr, breaks=50)
hm <- heatmap.2(as.matrix(ori.cor),  scale="none", trace="none",
main=paste("Correlation between marks\nat origin", type),
col=cols, breaks=seq(-1,1,2/length(cols)))
write.table(ori.cor,
file=file.path(paste(sep="", "mark_correlations_", type,".tab"),
sep="\t", quote=FALSE, row.names=TRUE, col.names=NA)
)
paste(sep="", "mark_correlations_", type,".tab")
write.table(ori.cor,
file=paste(sep="", type, "mark_correlations.tab"),
sep="\t", quote=FALSE, row.names=TRUE, col.names=NA)
getwd()
dir.main <- "~/replication_origins"
dir.overlaps <- file.path(dir.main, "analysis/stats_overlaps/")
setwd(dir.overlaps)
getwd()
list.files()
data.types = c("SICER_IZ", "SWEMBL_IS")
type <- "SICER_IZ"
region.annot.file <- paste(sep="", data.type, "_table.txt")
region.annot.file
marks.per.region <- read.table(file = region.annot.file, header = TRUE)
row.names(marks.per.region)  <- paste(sep="_",
marks.per.region$chromosome,
marks.per.region$start,
marks.per.region$end,
marks.per.region$lenght)
marks.per.region <- marks.per.region[, -(1:8)]
## Treat one data set
dim(marks.per.region)
View(marks.per.region)
ori.cor <- cor(na.omit(marks.per.region))
## raw a heatmap of the correlation
cols <- rev(colorRampPalette(brewer.pal(9,"RdBu"))(100))   ## Define color palette
hm <- heatmap.2(as.matrix(ori.cor),  scale="none", trace="none",
main=paste("Correlation between marks\nat origin", type),
col=cols, breaks=seq(-1,1,2/length(cols)))
write.table(ori.cor,
file=paste(sep="", type, "mark_correlations.tab"),
sep="\t", quote=FALSE, row.names=TRUE, col.names=NA)
library(gplots) ## Required for heatmaps.2
library(RColorBrewer)
## Set woring directory
dir.main <- "~/replication_origins"
dir.overlaps <- file.path(dir.main, "analysis/stats_overlaps/")
setwd(dir.overlaps)
## Load the data
data.types = c("SICER_IZ", "SWEMBL_IS")
type <- "SICER_IZ"
for (type in data.types) {
## Load a data table with one row per peak or region,
## and one column per mark, indicating how many marks each peak/region overlaps.
region.annot.file <- paste(sep="", data.type, "_table.txt")
marks.per.region <- read.table(file = region.annot.file, header = TRUE)
row.names(marks.per.region)  <- paste(sep="_",
marks.per.region$chromosome,
marks.per.region$start,
marks.per.region$end,
marks.per.region$lenght)
marks.per.region <- marks.per.region[, -(1:8)]
## Treat one data set
#  dim(marks.per.region)
#  View(marks.per.region)
## Compute correlation between columns
ori.cor <- cor(na.omit(marks.per.region))
## raw a heatmap of the correlation
cols <- rev(colorRampPalette(brewer.pal(9,"RdBu"))(100))   ## Define color palette
hm <- heatmap.2(as.matrix(ori.cor),  scale="none", trace="none",
main=paste("Correlation between marks\nat origin", type),
col=cols, breaks=seq(-1,1,2/length(cols)))
write.table(ori.cor,
file=paste(sep="", type, "mark_correlations.tab"),
sep="\t", quote=FALSE, row.names=TRUE, col.names=NA)
}
library(stats4bioinfo)
verbose(paste("Analyzing data type", type))
library(gplots) ## Required for heatmaps.2
library(RColorBrewer)
library(stats4bioinfo)
## Set woring directory
dir.main <- "~/replication_origins"
dir.overlaps <- file.path(dir.main, "analysis/stats_overlaps/")
setwd(dir.overlaps)
## Load the data
data.types = c("SICER_IZ", "SWEMBL_IS")
type <- "SICER_IZ"
for (type in data.types) {
verbose(paste("Analyzing data type", type))
## Load a data table with one row per peak or region,
## and one column per mark, indicating how many marks each peak/region overlaps.
region.annot.file <- paste(sep="", data.type, "_table.txt")
marks.per.region <- read.table(file = region.annot.file, header = TRUE)
row.names(marks.per.region)  <- paste(sep="_",
marks.per.region$chromosome,
marks.per.region$start,
marks.per.region$end,
marks.per.region$lenght)
marks.per.region <- marks.per.region[, -(1:8)]
## Treat one data set
#  dim(marks.per.region)
#  View(marks.per.region)
## Compute correlation between columns
ori.cor <- cor(na.omit(marks.per.region))
## raw a heatmap of the correlation
cols <- rev(colorRampPalette(brewer.pal(9,"RdBu"))(100))   ## Define color palette
hm <- heatmap.2(as.matrix(ori.cor),  scale="none", trace="none",
main=paste("Correlation between marks\nat origin", type),
col=cols, breaks=seq(-1,1,2/length(cols)))
write.table(ori.cor,
file=paste(sep="", type, "mark_correlations.tab"),
sep="\t", quote=FALSE, row.names=TRUE, col.names=NA)
}
dir.main <- "~/replication_origins"
dir.overlaps <- file.path(dir.main, "analysis/stats_overlaps/")
setwd(dir.overlaps)
## Load the data
data.types = c("SICER_IZ", "SWEMBL_IS")
type <- "SICER_IZ"
verbose(paste("Analyzing data type", type))
## Load a data table with one row per peak or region,
## and one column per mark, indicating how many marks each peak/region overlaps.
region.annot.file <- paste(sep="", data.type, "_table.txt")
marks.per.region <- read.table(file = region.annot.file, header = TRUE)
row.names(marks.per.region)  <- paste(sep="_",
marks.per.region$chromosome,
marks.per.region$start,
marks.per.region$end,
marks.per.region$lenght)
marks.per.region <- marks.per.region[, -(1:8)]
verbose(paste("Analyzing data type", type))
## Load a data table with one row per peak or region,
## and one column per mark, indicating how many marks each peak/region overlaps.
region.annot.file <- paste(sep="", data.type, "_table.txt")
marks.per.region <- read.table(file = region.annot.file, header = TRUE)
row.names(marks.per.region)  <- paste(sep="_",
marks.per.region$chromosome,
marks.per.region$start,
marks.per.region$end,
marks.per.region$lenght)
#
names(marks.per.region)
view(marks.per.region)
View(marks.per.region)
type <- "SWEMBL_IS"
verbose(paste("Analyzing data type", type))
library(gplots) ## Required for heatmaps.2
library(RColorBrewer)
library(stats4bioinfo)
## Set woring directory
dir.main <- "~/replication_origins"
dir.overlaps <- file.path(dir.main, "analysis/stats_overlaps/")
setwd(dir.overlaps)
swembl.is <- read.table(file = "SWEMBL_IS_table.txt", header=TRUE)
library(gplots) ## Required for heatmaps.2
library(RColorBrewer)
library(stats4bioinfo)
## Set woring directory
dir.main <- "~/replication_origins"
dir.overlaps <- file.path(dir.main, "analysis/stats_overlaps/")
setwd(dir.overlaps)
## Load the data
setwd(dir.overlaps)
data.types = c("SWEMBL_IS", "SICER_IZ")
swembl.is <- read.table(file = "SWEMBL_IS_table.txt", header=TRUE)
## Load SWEMBL peaks (initiation sites)
swembl.is <- read.table(file = "SWEMBL_IS_table.tab", header=TRUE)
## Load SWEMBL peaks (initiation sites)
swembl.is <- read.table(file = "SWEMBL_IS_table.tab", header=TRUE)
## Load SWEMBL peaks (initiation sites)
swembl.is <- read.table(file = "SWEMBL_IS_table.tab", header=TRUE)
## Load required librarires
library(gplots) ## Required for heatmaps.2
library(RColorBrewer)
library(stats4bioinfo)
## Set woring directory
dir.main <- "~/replication_origins"
dir.overlaps <- file.path(dir.main, "analysis/stats_overlaps/")
setwd(dir.overlaps)
## Load SWEMBL peaks (initiation sites)
swembl.is <- read.table(file = "SWEMBL_IS_table.tab", header=TRUE)
swembl.is$class[swembl.is$class==3] <- "3a"
swembl.is$class[swembl.is$class==4] <- "3b"
swembl.is.nb <- nrow(swembl.is)
type <- "peaks" ## Data type used as suffix later
# dim(swembl.is)
# View(swembl.is)
## Load a data table with one row per peak or region,
## and one column per mark, indicating how many marks each peak/region overlaps.
row.names(swembl.is)  <- swembl.is[,"feature_name"]
## Define mark order for drawing
mark.order <- mark.order <- c("H3K56ac",
"H3k27ac",
"p300",
"TBP",
"Pol2_Ser5P",
"Pol2_global",
"H3K4me2",
"Dnase",
"H3k09ac",
"CpG",
"H3K4me3",
"ES_UMR",
"H3k36me3",
"X5hmC",
"X5fC",
"ES_LMR",
"H3k04me1",
"H2A.Z",
"Ac_H2A.Z",
"H3k09me3",
"Sox2",
"Nanog",
"Ctcf",
"Eset",
"bivalent_domain",
"H3K27me3",
"Ezh2",
"Ring1b",
"Smc3",
"Smc1",
"Gquad",
"Pol2_Ser2P",
"Rest",
"Smarc4",
"Wdr5",
"Klf4",
"CHD4",
"E2f1",
"Med1",
"Oct.04",
"mof",
"ES_FMR")
## Compute a Boolean table indicating if each mark (column)
## overlaps or not a given peak (row).
swembl.is.overlap <- data.frame(swembl.is[, mark.order] > 0)
swembl.is.overlap <- swembl.is.overlap + 0  ## Tricky way to obtaine 1/0 rather than  TRUE/FALSE
# View(swembl.is)
################################################################
## Compute correlation between marks in the peaks/regions
##
## Not used or shown here, it is just a visual control to check that
## I have te same dataset as for the heatmap figure of the article.
# ori.cor <- cor(na.omit(swembl.is[, mark.order]))
#
# ## Draw a heatmap of the correlation
# cols <- rev(colorRampPalette(brewer.pal(9,"RdBu"))(100))   ## Define color palette
# hm <- heatmap.2(as.matrix(ori.cor),  scale="none", trace="none",
#                 main=paste("Correlation between marks\nat origin", type),
#                 col=cols, breaks=seq(-1,1,2/length(cols)))
#
# write.table(ori.cor,
#             file=paste(sep="", "SWEMBL_IS_correlations.tab"),
#             sep="\t", quote=FALSE, row.names=TRUE, col.names=NA)
## Set global options for R chunks
knitr::opts_chunk$set(eval=TRUE, cache=TRUE, echo=FALSE)
setwd(dir.overlaps)
dir.overlaps
source('~/Documents/research/collaborations/marcel_mechali/scripts/R-scripts/kmer_occurrences_per_slice.R', echo=TRUE)
panel.first=grid(col="#BBBBBB", lty="solid"))
plot(bed[, c("upG", "downC")], col=bed$color,
main=paste(sep="", pos.ol, "nt; upG versus downG occurrences"),
xlab="G-rich 6-mers upstream occurrences",
ylab="C-rich 6-mers downstreal occurrences",
panel.first=grid(col="#BBBBBB", lty="solid"))
legend("bottomleft", legend=names(class.colors), col=class.colors, pch=20, cex=1.2, bg="white", bty="o")
plot(bed[, c("upG", "downC")], col=bed$color,
main=paste(sep="", pos.ol, "nt; upG versus downG occurrences"),
xlab="G-rich 6-mers upstream occurrences",
ylab="C-rich 6-mers downstreal occurrences",
panel.first=grid(col="#BBBBBB", lty="solid"))
legend("topright", legend=names(class.colors), col=class.colors, pch=20, cex=1.2, bg="white", bty="o")
source('~/Documents/research/collaborations/marcel_mechali/scripts/R-scripts/kmer_occurrences_per_slice.R', echo=TRUE)
pos.ol
peaks.per.class <- data.frame(table(bed$class))
peaks.per.class
names(peaks.per.class) <- c("Polarity.class", "Peaks")
peaks.per.class <- peaks.per.class[,rev(names(peaks.per.class))]
peaks.per.class
write.table(peaks.per.class, file=file.path(dir.occ, paste(sep="", file.prefix, "_peaks_per_class.tab")), col.names = TRUE, row.names = FALSE, sep="\t", quote=FALSE)
source('~/Documents/research/collaborations/marcel_mechali/scripts/R-scripts/kmer_occurrences_per_slice.R', echo=TRUE)
names(to.export)
table(to.export$class)
