#####################
#####################################
##########################################################
#library
    library(GenomicRanges)
    library(limma)
    library(edgeR)
    library(DESeq2)
options(stringsAsFactors = FALSE)
#
#####################
#####################################
##########################################################
######### loading data
    GeneTranscript<-read.delim("gencode.v19.annotation.txt") 
    CpG<-read.delim2("hg19_cpg_island.txt",header=T)
    K450<-read.delim("450k_methylation.txt",header=T)
    RNAseq<-read.delim("RNAseq.txt",header=T)
    CNV<-read.delim("CNV.txt",header=T)
#
#####################
#####################################
##########################################################
####### identification of genes with CGI/promoter and covered by at least 2 HM450K probes
###############################################################################
        GeneTranscriptSelec<-GeneTranscript[GeneTranscript$gene_type=="antisense" | GeneTranscript$gene_type=="lincRNA" | GeneTranscript$gene_type=="protein_coding" | GeneTranscript$gene_type=="pseudogene",]
        Gene<-GeneTranscriptSelec[GeneTranscriptSelec$type=="gene",]
        Transcript<-GeneTranscriptSelec[GeneTranscriptSelec$type=="transcript",]
        Gene$N_CpGi<-NA
        Gene$CpGi_N_probes<-NA
        Gene$CpGi_chr<-NA
        Gene$CpGi_Start<-NA
        Gene$CpGi_End<-NA
        Gene$CpGi_ID<-NA
        Gene$PromStart<-NA
        Gene$TranscriptID<-NA
        grGene<-GRanges(seqnames = c(Gene$chr[Gene$strand=="+"],Gene$chr[Gene$strand=="-"]),
                            IRanges( start= c(Gene$start[Gene$strand=="+"],Gene$end[Gene$strand=="-"]),
                            width =1 ),ID=c(Gene$ID[Gene$strand=="+"],Gene$ID[Gene$strand=="-"]))
        grTranscript<-GRanges(seqnames = c(Transcript$chr[Transcript$strand=="+"],Transcript$chr[Transcript$strand=="-"]),
                                IRanges( start= c(Transcript$start[Transcript$strand=="+"],Transcript$end[Transcript$strand=="-"]),
                                width =1 ),ID=c(Transcript$ID[Transcript$strand=="+"],Transcript$ID[Transcript$strand=="-"]),GeneID=c(Transcript$gene_id[Transcript$strand=="+"],Transcript$gene_id[Transcript$strand=="-"]))
        promoteurAS<- data.frame("name"=Transcript[Transcript$strand =="-",9],"name2"=Transcript[Transcript$strand =="-",8],"strand"=Transcript[Transcript$strand =="-",6],"chr"=Transcript[Transcript$strand =="-",1],"start"=Transcript[Transcript$strand =="-",5]-1000,"end"=Transcript[Transcript$strand =="-",5]+1000,"ID"=Transcript$ID[Transcript$strand =="-"])
        promoteurS<- data.frame("name"=Transcript[Transcript$strand =="+",9],"name2"=Transcript[Transcript$strand =="+",8],"strand"=Transcript[Transcript$strand =="+",6],"chr"=Transcript[Transcript$strand =="+",1],"start"=Transcript[Transcript$strand =="+",4]-1000,"end"=Transcript[Transcript$strand =="+",4]+1000,"ID"=Transcript$ID[Transcript$strand =="+"])
        promoteurAS<-promoteurAS[!duplicated(promoteurAS[,-7]),]
        promoteurS<-promoteurS[!duplicated(promoteurS[,-7]),]
        grpromoteur <- GRanges(seqnames = c(as.character(promoteurAS[,4]),as.character(promoteurS[,4])),
                                IRanges(start = c(promoteurAS[,5],promoteurS[,5]),
                                end = c(promoteurAS[,6],promoteurS[,6])),
                                strand =c(as.character(promoteurAS[,3]),as.character(promoteurS[,3])),
                                gene1 = c(as.character(promoteurAS[,1]),as.character(promoteurS[,1])),
                                gene2= c(as.character(promoteurAS[,2]),as.character(promoteurS[,2])),
                                transcript= c(as.character(promoteurAS[,7]),as.character(promoteurS[,7])))
#################################################################################
######     CpG islands info
##################################################################################
    grCpG<-GRanges(seqnames =CpG[,2],
                    IRanges(start = CpG[,3],
                    end =CpG[,4] ),ID= paste(CpG[,2],CpG[,3],CpG[,4],CpG[,5],sep="/"))
###############################################################################
######     HM450K info
##################################################################################
        rownames(K450)<-K450$TargetID
        grprobes<-GRanges(seqnames = paste("chr",K450[,2],sep=""),
                            IRanges( start= K450[,3],
                            width = 1),TargetID=K450[,1])
        for (i in 1:length(Gene[,1]))
            {
            Prom<-grpromoteur[grpromoteur$gene2==Gene$gene_id[i],]
            posCpG<-findOverlaps(Prom,grCpG)
            if (length(posCpG)==0) 
                {
                Gene$N_CpGi[i]<-0
                } else {if (length(levels(as.factor(as.data.frame(posCpG)[,2])))==1)
                                {
                                Gene$N_CpGi[i]<-length(levels(as.factor(as.data.frame(posCpG)[,2])))
                                CpGselec<-grCpG[as.numeric(levels(as.factor(as.data.frame(posCpG)[,2])))]
                                Gene$CpGi_Start[i]<-start(CpGselec)
                                Gene$CpGi_End[i]<-end(CpGselec)
                                Gene$CpGi_chr[i]<-as.character(seqnames(CpGselec))
                                Gene$CpGi_ID[i]<-CpGselec$ID
                                posprobes<-as.data.frame(findOverlaps(CpGselec,grprobes))
                                Gene$CpGi_N_probes[i]<-length(levels(as.factor(as.data.frame(posprobes)[,2])))
                                grTrselec<-grTranscript[grTranscript$GeneID==Gene$gene_id[i]]
                                dis<-as.data.frame(distanceToNearest( grTrselec,grCpG))$distance
                                grTrselec2<-grTrselec[dis==min(dis)]
                                dis2<-as.data.frame(distanceToNearest( grTrselec2,grGene[grGene$ID==Gene$gene_id[i]]))$distance 
                                grTrselec3<-grTrselec2[dis2==min(dis2)]
                                Gene$PromStart[i]<-start( grTrselec3)[1]
                                Gene$TranscriptID[i]<- grTrselec3$ID[1]
                                rm(grTrselec3,dis2,grTrselec2,dis,grTrselec)
                                } else {
                                        Gene$N_CpGi[i]<-length(levels(as.factor(as.data.frame(posCpG)[,2])))
                                        }
                        }
            }
    selec<-(Gene$CpGi_N_probes>1)
    selec[is.na(selec)]<-F
    Gene1CpG<-Gene[selec,]##### Genes with 1 CGi/pormoter and at least 2 HM450K probes
####################################################################################
#####################################
#####################
#

#
#####
###########
#### methylation statistical analysis
#####################
linear<-function(gr1,gr2,datax)
    {
    name1<-"CTRL"
    name2<-"glioma"
    groupvalues<- datax[,c(gr1,gr2)]
    design <- model.matrix(~ 0+factor(c(rep(1,length(gr1)),rep(2,length(gr2)))))
    colnames(design) <- c(name1,name2)
    fit1 <- lmFit(groupvalues,design)
    cont.matrix <- makeContrasts("ctrl/glioma"=CTRL - glioma, levels=design)
    fit2 <- contrasts.fit(fit1,cont.matrix)
    fit3 <- eBayes(fit2)
    fdr<- p.adjust(fit3$p.value,method ="fdr")
    pval<-fit3$p.value
    delta<-fit3$coefficients
    return<-cbind(delta,fdr)
    }
Stat_Meth<-linear(grep("ctrl",colnames(K450)), grep("IDHwt",colnames(K450)),K450)
#
#####
######################
# data integration
    grStatMeth<-GRanges(seqnames = paste("chr",K450[,2],sep=""),
                IRanges( start= K450[,3],
                width = 1),TargetID=K450[,1],
                hyper_All=((Stat_Meth[,1]< -0.1 )& Stat_Meth[,2]<0.05),
                hypo_All=((Stat_Meth[,1]> 0.1 )& Stat_Meth[,2]<0.05))
    grCGiProm<-GRanges(seqnames = Gene1CpG$CpGi_chr,
              IRanges( start= Gene1CpG$CpGi_Start,
                        end= Gene1CpG$CpGi_End))
    grCGiProm$probes<-0
    pos<-as.data.frame(findOverlaps(grStatMeth,grCGiProm))
    grCGiProm$probes[as.numeric(names(table(pos[,2])))]<- as.numeric(table(pos[,2]))
    grCGiProm$hyper_All<-F
    pos<-as.data.frame(findOverlaps( grStatMeth[grStatMeth$hyper_All],grCGiProm))
    grCGiProm$hyper_All[as.numeric(names(table(pos[,2])))]<- as.numeric(table(pos[,2]))
    grCGiProm$hypo_All<-F
    pos<-as.data.frame(findOverlaps( grStatMeth[grStatMeth$hypo_All],grCGiProm))
    grCGiProm$hypo_All[as.numeric(names(table(pos[,2])))]<- as.numeric(table(pos[,2]))
#
#####
######################
# Mean methylation CpG islands per samples
M <- matrix(NA,ncol=length(c(grep("ctrl",colnames(K450)),grep("IDH",colnames(K450)))) ,nrow=length(grCGiProm$probes))
M<-as.data.frame(M)
colnames(M)<-colnames(K450[,c(grep("ctrl",colnames(K450)),grep("IDH",colnames(K450)))])
pos<-as.data.frame(findOverlaps( grprobes,grCGiProm))

for (i in 1 : length(grCGiProm$probes))
    {
    M[i,]<-colMeans(K450[pos[pos[,2]==i,1],c(grep("ctrl",colnames(K450)),grep("IDH",colnames(K450)))],na.rm=T)
    print(i)
   }
colnames(M) <- paste("Mean_Bval_",colnames(M),sep="")
mcols(grCGiProm)<-data.frame(as.data.frame(mcols(grCGiProm)),M)
Gene1CpG_Meth<-as.data.frame(grCGiProm) ##### Methylation data for Genes with 1 CGi/pormoter and at least 2 HM450K probes
####################################################################################
#####################################
#####################
#

#
#####
###########
#### RNAseq statistical analysis
#####################
    targets <- data.frame(FileName=colnames(RNAseq)[-1],Group=c(rep("ctrl",length(grep("ctrl",colnames(RNAseq)))),rep("IDHwt",,length(grep("IDHwt",colnames(RNAseq))))))
    genes<-RNAseq [,1]
    RNAseq <-RNAseq [,c(grep("ctrl",targets[,2]),grep("IDH",targets[,2]))+1]
    allTotal <- rowSums(RNAseq)
    RNAseq <- RNAseq[allTotal != 0 ,]
    genes<-genes[allTotal != 0 ]
    library.sizes <- colSums(RNAseq)
###########################################
######## EdgeR 
######## EdgeR 
######## EdgeR 
###########################################
        rownames(RNAseq) <- genes
        d <- DGEList(counts=as.matrix(RNAseq), lib.size=library.sizes,
        group=targets$Group)
        d <- calcNormFactors(d)
        d <- estimateCommonDisp(d)
        d <- estimateTagwiseDisp(d, trend="none")
        de.com1 <- exactTest(d,pair=c("ctrl","IDHwt"))
        wt <- topTags(de.com1,n=Inf)[[1]]
###########################################
###########################################
############### DESeq2
############### DESeq2
############### DESeq2
##########################################
                cts<-RNAseq
                rownames(cts)<-genes
                Patient<-as.factor(targets[,2])
                Patient<-relevel(Patient, ref="ctrl")
                coldata <- data.frame(Patient)
                rownames(coldata) <- colnames(RNAseq)
                dds <- DESeqDataSetFromMatrix(countData = cts,
                              colData = coldata,
                              design = ~ Patient )
                dds <- DESeq(dds)
                reswt<- results(dds,contrast=c("Patient","IDHwt","ctrl"))
##########################################
##### combine RNAseq analyses and data integration
    combineWT<-data.frame(wt,rownames(wt),as.character(Gene1CpG$gene_name[match(rownames(wt),Gene1CpG$ID)]),as.character(Gene1CpG$ID[match(rownames(wt),Gene1CpG$ID)]),rownames(reswt[match(rownames(wt),rownames(reswt)),]),reswt[match(rownames(wt),rownames(reswt)),])
    combineWT$padj[is.na(combineWT$padj)]<-1
    combineWT$commonUP<-combineWT$FDR<0.05 & combineWT$logFC>2 & combineWT$padj<0.05 & combineWT$log2FoldChange>2
    combineWT$commonDOWN<-combineWT$FDR<0.05 & combineWT$logFC< -2 & combineWT$padj<0.05 & combineWT$log2FoldChange< -2 
    combineWT<-combineWT[order(rownames(combineWT)),]
    Gene1CpG$WTcommonUP<-combineWT$commonUP[(match(Gene1CpG$ID,rownames(combineWT)))]
    Gene1CpG$WTcommonUP[is.na(Gene1CpG$WTcommonUP)]<-F
    Gene1CpG$WTcommonDOWN<-combineWT$commonDOWN[(match(Gene1CpG$ID,rownames(combineWT)))]
    Gene1CpG$WTcommonDOWN[is.na(Gene1CpG$WTcommonDOWN)]<-F ##### RNAseq data for Genes with 1 CGi/pormoter and at least 2 HM450K probes
##########################################
#####################################
#####################
#

#
#####################
#####################################
##########################################################
# Correlation RNaseq and CNV 
######## RNAseq data formatting
    counts<-d$pseudo.counts
    Gene1CpG_Exp<-data.frame(Gene1CpG,counts[match(Gene1CpG$ID,rownames(counts)),])
    colnames(counts)<-paste("Exp_",colnames(counts),sep="")
    Gene1CpG_ExpV2<-data.frame(Gene1CpG,counts[match(Gene1CpG$ID,rownames(counts)),])

    rownames(Gene1CpG_Exp)<-Gene1CpG$ID
######## CNV data formatting
        grdataWG <- GRanges(seqnames =Gene1CpG$chr,
                        IRanges(start = Gene1CpG$start,
                        end = Gene1CpG$end),ID=Gene1CpG$ID)
        for (i in 1 : length(levels(as.factor(CNV$Sample))))
            {
            sampl<-levels(as.factor(CNV$Sample))[i]
            selec<-CNV[CNV$Sample==sampl,]
            grselec <- GRanges(seqnames =paste("chr",selec$Chromosome,sep=""),
                       IRanges(start = selec$Min,
                       end = selec$Max),CNV=selec$Mean.Log2Ratio)
            pos<-as.data.frame( findOverlaps(grdataWG,grselec))
            grdataWG$samp<-0
            grdataWG$samp[pos[,1]]<-grselec$CNV [pos[,2]]
            names(mcols(grdataWG))[i+1]<-sampl
            }
        Gene1CpG_CNV<-as.data.frame(grdataWG)[,-c(1:6)]
        rownames(Gene1CpG_CNV)<-grdataWG$ID
        Gene1CpG_CNV$Pval_cor_EXP_CNV<-1
##### correlation CNV and EXP
    for (i in 1:length(Gene1CpG_CNV$Pval_cor_EXP_CNV))
        {
        try(Gene1CpG_CNV$Pval_cor_EXP_CNV[i]<-cor.test(t(Gene1CpG_Exp[i,match(colnames(Gene1CpG_CNV),colnames(Gene1CpG_Exp),nomatch=0)]),t(Gene1CpG_CNV[i,-length(Gene1CpG_CNV[1,])]),method="pearson")$p.value , silent = T)
        print(i)
        }
    Gene1CpG_CNV$Pval_cor_EXP_CNV[is.na(Gene1CpG_CNV$Pval_cor_EXP_CNV)]<-1
    Gene1CpG_CNV$cor_CNV_EXP<-Gene1CpG_CNV$Pval_cor_EXP_CNV<0.05 ##### CNV data for Genes with 1 CGi/pormoter and at least 2 HM450K probes
##########################################
#####################################
#####################
#


#
#####################
#####################################
##########################################################
#### build the intergative results file
result<- data.frame(Gene1CpG[,1:19],
"Meth_Hyper"=Gene1CpG_Meth$hyper_All>1,
"Meth_Hypo"=Gene1CpG_Meth$hypo_All>1,
"Exp_Down"=Gene1CpG_Exp$WTcommonDOWN,
"Exp_Up"=Gene1CpG_Exp$WTcommonUP,
"Cor_CNV_Exp"=Gene1CpG_CNV$cor_CNV_EXP,
Gene1CpG_Meth[,grep("Mean",colnames(Gene1CpG_Meth))],
Gene1CpG_ExpV2[,grep("Exp",colnames(Gene1CpG_ExpV2))])

write.table(result,"result.txt",sep="\t",row.names = F,col.names =T ,quote = F)
