rm(list=ls())

# please set these pathway correctly and check if the data can be loaded
dataPath="~/Desktop/Projects/SubTyping/DataTCGA/"
resultPath="~/Desktop/Projects/SubTyping/PackageAndTesting/CCResult/"

# dataPath="/wsu/home/ex/ex60/ex6091/Subtyping/PackageAndTesting/DataTCGA/"
# resultPath="/wsu/home/ex/ex60/ex6091/Subtyping/PackageAndTesting/CCResult/"

#IMPORTANT: restart R after installing ConsensusClusterPlus. Otherwise the script crashes with strange message 
#Error: lazy-load database 'P' is corrupt
library(preprocessCore)
library(ConsensusClusterPlus)

# normalize the data as for SNF when concatenate data
standardNormalization <- function (x) 
{
    x = as.matrix(x)
    mean = apply(x, 2, mean)
    sd = apply(x, 2, sd)
    sd[sd == 0] = 1
    xNorm = t((t(x) - mean)/sd)
    return(xNorm)
}

clusRange=2:10


for (dataset in c("LUSC", "GBM", "BRCA", "LAML", "KIRC", "COAD")) {
    prefix=paste(dataPath, dataset, "/", sep="")
    
    file=paste(prefix, dataset, "_ProcessedData.RData" ,sep="")
    load(file)
    
    patients=rownames(survival)
    patients=intersect(patients,rownames(mydatGE))
    patients=intersect(patients,rownames(mydatME))
    patients=intersect(patients,rownames(mydatMI))
    
    mydatGE=as.matrix(mydatGE[patients,])
    mydatME=as.matrix(mydatME[patients,])
    mydatMI=as.matrix(mydatMI[patients,])
    survival=survival[patients,]
        
    if (!file.exists(paste(resultPath, dataset, "/", sep=""))) {dir.create(paste(resultPath, dataset, "/", sep=""))}
    
    t1_GE=Sys.time()
    d <- t(mydatGE)
    d = sweep(d,1, apply(d,1,median,na.rm=T))
    path=paste(resultPath, dataset, "/", "GE",sep="")
    if (!file.exists(path)) {dir.create(path)}
    result_GE = ConsensusClusterPlus(d,maxK=max(clusRange),reps=1000,pItem=0.8,pFeature=1, title=path,clusterAlg="hc",distance="pearson", plot="png", seed=888)
    t2_GE=Sys.time()  
        
    t1_ME=Sys.time()
    d <- t(mydatME)
    d = sweep(d,1, apply(d,1,median,na.rm=T))
    path=paste(resultPath, dataset, "/", "ME",sep="")
    if (!file.exists(path)) {dir.create(path)}
    result_ME = ConsensusClusterPlus(d,maxK=max(clusRange),reps=1000,pItem=0.8,pFeature=1, title=path,clusterAlg="hc",distance="pearson", plot="png", seed=888)
    t2_ME=Sys.time()
        
    t1_MI=Sys.time()
    d <- t(mydatMI)
    d = sweep(d,1, apply(d,1,median,na.rm=T))
    path=paste(resultPath, dataset, "/", "MI",sep="")
    if (!file.exists(path)) {dir.create(path)}
    result_MI = ConsensusClusterPlus(d,maxK=max(clusRange),reps=1000,pItem=0.8,pFeature=1, title=path,clusterAlg="hc",distance="pearson", plot="png", seed=888)
    t2_MI=Sys.time()

    t1_All=Sys.time()
    d=cbind(mydatGE, mydatMI, mydatME)
    d=standardNormalization(d);d=t(d)
    d = sweep(d,1, apply(d,1,median,na.rm=T))
    path=paste(resultPath, dataset, "/", "All",sep="")
    if (!file.exists(path)) {dir.create(path)}
    result_All = ConsensusClusterPlus(d,maxK=max(clusRange),reps=1000,pItem=0.8,pFeature=1, title=path,clusterAlg="hc",distance="pearson", plot="png", seed=888)
    t2_All=Sys.time()
  
    resultFile=paste(resultPath, "CC_", dataset,".RData",sep="")
    save(result_GE, result_ME, result_MI, result_All, dataset, survival, t1_GE, t2_GE, t1_ME, t2_ME, t1_MI, t2_MI, t1_All, t2_All, file=resultFile)
}

library(survival)
for (dataset in c("KIRC", "GBM", "LAML", "LUSC", "BRCA", "COAD")) {
    resultFile=paste(resultPath, "CC_", dataset,".RData",sep="")
    load(resultFile)

    # we get these from CDF plots
    if (dataset=="KIRC") {
        kGE=6
        kME=6
        kMI=5
        kAll=6
    } else if (dataset=="GBM") {
        kGE=5
        kME=6
        kMI=6
        kAll=7
    } else if (dataset=="LAML") {
        kGE=5
        kME=7
        kMI=6
        kAll=8
    } else if (dataset=="LUSC") {
        kGE=5
        kME=9
        kMI=7
        kAll=6
    } else if (dataset=="BRCA") {
        kGE=8
        kME=8
        kMI=5
        kAll=7
    } else if (dataset=="COAD") {
        kGE=8
        kME=8
        kMI=7
        kAll=5
    }

    
    
    pdfFile=paste(resultPath, "CC_", dataset, ".pdf" ,sep="")  
    pdf(pdfFile)
    
    #Cox log rank test GE
    groupGE=result_GE[[kGE]]$consensusClass
    groups <- factor(groupGE); names(groups) = rownames(survival)
    coxFit <- coxph(Surv(time = Survival, event = Death) ~ groups, data = survival, ties="exact")
    mfit <- survfit(Surv(Survival, Death == 1) ~ factor(groups), data = survival)
    plot(mfit, col=unique(groups), main = paste("Survival curves for gene expression of ",dataset, " (CC)", sep=""), xlab = "Days", ylab="Survival", lwd=2)
    legend("top", legend = paste("Cox p-value:", round(summary(coxFit)$sctest[3],digits = 5), sep=""))
    legend("topright", fill=levels(factor(groups)), legend=paste("Group ",levels(factor(groups)), ": ", table(groups)[levels(factor(groups))], sep=""))
    
    #Cox log rank test ME: Methylation
    groupME=result_ME[[kME]]$consensusClass
    groups <- factor(groupME); names(groups) = rownames(survival)
    coxFit <- coxph(Surv(time = Survival, event = Death) ~ groups, data = survival, ties="exact")
    mfit <- survfit(Surv(Survival, Death == 1) ~ factor(groups), data = survival)
    plot(mfit, col=unique(groups), main = paste("Survival curves for methylation of ", dataset, " (CC)", sep=""), xlab = "Days", ylab="Survival", lwd=2)
    legend("top", legend = paste("Cox p-value:", round(summary(coxFit)$sctest[3],digits = 5), sep=""))
    legend("topright", fill=levels(factor(groups)), legend=paste("Group ",levels(factor(groups)), ": ", table(groups)[levels(factor(groups))], sep=""))
    
    #Cox log rank test MI: MicroRNA
    groupMI=result_MI[[kMI]]$consensusClass
    groups <- factor(groupMI); names(groups) = rownames(survival)
    coxFit <- coxph(Surv(time = Survival, event = Death) ~ groups, data = survival, ties="exact")
    mfit <- survfit(Surv(Survival, Death == 1) ~ factor(groups), data = survival)
    plot(mfit, col=unique(groups), main = paste("Survival curves for microRNA of ", dataset, " (CC)", sep=""), xlab = "Days", ylab="Survival", lwd=2)
    legend("top", legend = paste("Cox p-value:", round(summary(coxFit)$sctest[3],digits = 5), sep=""))
    legend("topright", fill=levels(factor(groups)), legend=paste("Group ",levels(factor(groups)), ": ", table(groups)[levels(factor(groups))], sep=""))
    
    #Cox All
    groupAll=result_All[[kAll]]$consensusClass
    groups <- factor(groupAll); names(groups) = rownames(survival)
    coxFit <- coxph(Surv(time = Survival, event = Death) ~ groups, data = survival, ties="exact")
    mfit <- survfit(Surv(Survival, Death == 1) ~ factor(groups), data = survival)
    plot(mfit, col=unique(groups), main = paste("Survival curves for combined data of ", dataset, " (CC)", sep=""), xlab = "Days", ylab="Survival", lwd=2)
    legend("top", legend = paste("Cox p-value:", round(summary(coxFit)$sctest[3],digits = 5), sep=""))
    legend("topright", fill=levels(factor(groups)), legend=paste("Group ",levels(factor(groups)), ": ", table(groups)[levels(factor(groups))], sep=""))
    
    dev.off()
}

