#!/bin/Rscript
#Find the Genomic Feature in DNA methylation modification
library(MethylSeekR)
library("BSgenome.Hsapiens.UCSC.hg19")
sLengths=seqlengths(Hsapiens)
library(regioneR)
library(parallel)
library(rtracklayer)
####
set.seed(123)
if (!file.exists("train")){
dir.create("train")
dir.create("res")
}
######
##merge RData
data_path="merged_CpG/"
samples=list.files(data_path,pattern="Count_CpG.bedGraph")
undone = setdiff(gsub("_Merged_Count_CpG.bedGraph","",samples),
gsub("_PMDs.bed","",list.files("res",pattern="_PMDs.bed")))
samples = paste0(undone,"_Merged_Count_CpG.bedGraph")
####Hg19 CGI
session <- browserSession()
genome(session) <- "hg19"
query <- ucscTableQuery(session, table = "cpgIslandExt")
CpGislands.gr <- track(query)
genome(CpGislands.gr) <- NA
###remove CGI +/-5K CpGs
CpGislands.gr <-suppressWarnings(resize(CpGislands.gr, 5000, fix="center"))
for ( i in samples){
###Load GR
x <- toGRanges(paste0(data_path,i))
names(mcols(x)) = c("M","Um")
mcols(x)[,"T"] = mcols(x)[,1] + mcols(x)[,2]
###
ranges(x) <- end(x)
mcols(x) <- mcols(x)[,c("T","M")]
tag <- gsub("_Merged_Count_CpG.bedGraph","",i)
###PMD
PMDsegments<-segmentPMDs(m=x, chr.sel="chr22",
seqLengths=sLengths,pdfFilename=paste0("train/",tag,"_PMD.pdf"), num.cores=10)
###FDR cut-off
stats <- suppressWarnings(calculateFDRs(m=x, CGIs=CpGislands.gr,
PMDs=PMDsegments, num.cores=10,pdfFilename=paste0("train/",tag,"_FDR.pdf")))
FDR.cutoff <- 5
m.sel <- 0.5
n.sel=as.integer(names(stats$FDRs[as.character(m.sel), ]
[stats$FDRs[as.character(m.sel), ]<FDR.cutoff])[1])
###UMR LMR
UMRLMRsegments <- segmentUMRsLMRs(m=x, meth.cutoff=m.sel,
nCpG.cutoff=n.sel, PMDs=PMDsegments,
num.cores=10, myGenomeSeq=Hsapiens,minCover=5,
seqLengths=sLengths,pdfFilename=paste0("train/",tag,"_UMR_LMR.pdf"))
#save PMD LMR UMR
###PMD
write.table(as.data.frame(PMDsegments[PMDsegments$type=="PMD"])[c(1:3)],file=paste0("res/",tag,"_PMDs.bed"),
sep="\t",quote=F,col.names=F,row.names=F)
save(PMDsegments,file=paste0("res/",tag,"_PMDs.RData"))
###LMR & UMR
write.table(granges(UMRLMRsegments[UMRLMRsegments$type=="LMR",]),file=paste0("res/",tag,"_LMRs.bed"),
sep="\t",quote=F,col.names=F,row.names=F)
write.table(granges(UMRLMRsegments[UMRLMRsegments$type=="UMR",]),file=paste0("res/",tag,"_UMRs.bed"),
sep="\t",quote=F,col.names=F,row.names=F)
save(UMRLMRsegments,file=paste0("res/",tag,"_UMRsLMRs.RData"))
}