library("dplyr")
library("ggplot2")
# run computeCpgCov in mHapSuite
system("java -jar mHapSuite-2.0-jar-with-dependencies.jar computeCpgCov \
-bigwig colon_MM.bw -cpgPath hg19_CpG.gz -openChromatin colon_open.bed -tag colon_MHB \
-bedPath colon_MHB.bed -missingDataAsZero")
# the output file shows mean methylation of each CpG sites and whether it's covered by open chromatin regions
result=read.table("colon_MHB.txt")
names(result) <- c("chr", "beta", "Open")
head(result)
# assign CpG into groups by mean methylation-levels
result <- result[order(result$beta, decreasing = FALSE),]
result$Cluster <- paste0("cluster_", rep(1:10, each = trunc(nrow(result) / 10) + 1))[1:nrow(result)]
result$total=as.numeric("1")
head(result);table(result$Cluster)
res = as.data.frame(result[c("total","Open","Cluster")] %>% group_by(Cluster) %>% summarise_each(funs=sum))
rownames(res) =res$Cluster;res$Cluster = NULL
cluster_res = as.data.frame(result[c("beta","Cluster")] %>% group_by(Cluster) %>% summarise_each(funs=median))
rownames(cluster_res) = cluster_res$Cluster;cluster_res$Cluster=NULL
res = cbind(res,cluster_res)
res$Freq = 100*round(res$Open/res$total,4)
res$cluster=rownames(res)
head(res);dim(res)
colon_MHB=res[,c(3,4,5)]
head(colon_MHB)
# perform similar analysis for UMR, LMR, PMD and HMR
input=read.table("colon_merge.txt",header = T);head(input);table(input$type)
options(repr.plot.width = 8, repr.plot.height=6)
ggplot(input,aes(x=beta,y=Freq,color=type))+geom_smooth(se=F,size=1)+scale_y_continuous(expand=c(0,0))+
scale_x_continuous(limits=c(0,1))+ggtitle("Colon")+
scale_color_manual(values = c('#56A902','#2e409a','#fcbe32','#e97f02','#f15c5c'),name="")+
labs(x="Mean methylation",y="The Percent of CpG sites \noverlapped by Open chromatin regions (%)")+theme_classic()+
theme(plot.title = element_text(hjust=0.5,vjust=0.5),legend.text=element_text(size=13),axis.title=element_text(size=16),
axis.text = element_text(size = 13))+coord_cartesian(ylim = c(0, 100))+
geom_vline(aes(xintercept = 0.25),colour="black",linetype="dashed")
total=read.table("merge.txt",header = T)
total$tissue=(sapply(strsplit(total$tissue, "\\."), function(z) z[1]))
total$type=sapply(strsplit(total$tissue, "\\_"), function(z) z[2])
total$tissue=sapply(strsplit(total$tissue, "\\_"), function(z) z[1])
head(total);table(total$tissue,total$type)
total_input=aggregate(total[1:2], by = list(total$cluster, total$type), mean);head(total_input)
dim(total_input);table(total_input$`Group.2`)
options(repr.plot.width = 8, repr.plot.height=6)
ggplot(total_input,aes(x=beta,y=Freq,color=Group.2))+geom_smooth(se=F,size=1)+scale_y_continuous(expand=c(0,0))+
scale_x_continuous(limits=c(0,1))+ggtitle("17 tissues")+
scale_color_manual(values = c('#56A902','#2e409a','#fcbe32','#e97f02','#f15c5c'),name="")+
labs(x="Mean methylation",y="The Percent of CpG sites \noverlapped by Open chromatin regions (%)")+theme_classic()+
theme(plot.title = element_text(hjust=0.5,vjust=0.5),legend.text=element_text(size=13),axis.title=element_text(size=16),
axis.text = element_text(size = 13))+coord_cartesian(ylim = c(0, 100))+
geom_vline(aes(xintercept = 0.25),colour="black",linetype="dashed")