rm(list=ls())

# dataPath="~/Desktop/Projects/SubTyping/METABRIC/"
# resultPath="~/Desktop/Projects/SubTyping/PackageAndTesting/iClusterPlusResult/"

dataPath="/wsu/home/ex/ex60/ex6091/Subtyping/PackageAndTesting/METABRIC/"
resultPath="/wsu/home/ex/ex60/ex6091/Subtyping/PackageAndTesting/iClusterPlusResult/"

library(iClusterPlus)

## First, set all the parameters:
maxGeneNo=2000
kRange=1:9 ## would be 2:10 clusters
nThread=60


for (dataset in c("METABRIC_discovery","METABRIC_validation")) {
    set.seed(1)
    file=paste(dataPath, dataset, ".RData" ,sep="")
    load(file)
    
    t1=Sys.time()
    
    mydatCNV=t(mydatCNV[,-c(1:5)])
    
    patients=rownames(survival)
    patients=intersect(patients,rownames(mydatGE))
    patients=intersect(patients,rownames(mydatCNV))
    
    mydatGE=mydatGE[patients,]
    mydatCNV=mydatCNV[patients,]
    
    
    ## get top features only
    if (ncol(mydatGE) > maxGeneNo) {
        sds=apply(mydatGE,FUN=mad,MARGIN=2)
        mydatGE=mydatGE[,order(sds, decreasing = T)[1:maxGeneNo]]
    }
    
    if (ncol(mydatCNV) > maxGeneNo) {
        sds=apply(mydatCNV,FUN=mad,MARGIN=2)
        mydatCNV=mydatCNV[,order(sds, decreasing = T)[1:maxGeneNo]]
    }
    
    
    t1_GE=Sys.time()
    cv.fitGE=alist()
    for (k in kRange) {
        cv.fitGE[[k]]=tune.iClusterPlus(cpus=nThread, dt1=mydatGE, K=k, type=c("gaussian"))
    }
    t2_GE=Sys.time()
    print(t2_GE-t1_GE)
    
    t1_CNV=Sys.time()
    cv.fitCNV=alist()
    for (k in kRange) {
        cv.fitCNV[[k]]=tune.iClusterPlus(cpus=nThread, dt1=mydatCNV, K=k, type=c("gaussian"))
    }
    t2_CNV=Sys.time()
    print(t2_CNV-t1_CNV)
    
      
    t1=Sys.time()
    cv.fit=alist()
    for (k in kRange) {
          cv.fit[[k]]=tune.iClusterPlus(cpus=nThread, dt1=mydatGE, dt2=mydatCNV, K=k, type=c("gaussian", "gaussian"))
    }
    
    t2=Sys.time()
    print(t2-t1)
    
    resultFile=paste(resultPath, "iClusterPlus_", dataset,"_", maxGeneNo, ".RData" ,sep="")  
    save(cv.fitGE, t1_GE, t2_GE, cv.fitCNV, t1_CNV, t2_CNV, cv.fit, t1, t2, patients, survival, survivalDFS, clinical, file=resultFile)
}


library(iClusterPlus)
maxGeneNo=2000
pdfFile=paste(resultPath, "iClusterPlus_METABRIC_Summary_", maxGeneNo,".pdf" ,sep="")
pdf(pdfFile)
for (dataset in c("METABRIC_discovery","METABRIC_validation")) {
    resultFile=paste(resultPath, "iClusterPlus_", dataset, "_", maxGeneNo, ".RData" ,sep="")
    load(resultFile)

    nK = length(cv.fit)

    BIC_GE=getBIC(cv.fitGE)
    devR_GE = getDevR(cv.fitGE)
    minBICid_GE = apply(BIC_GE,2,which.min)
    devRatMinBIC_GE = rep(NA,nK)
    for(i in 1:nK) {devRatMinBIC_GE[i] = devR_GE[minBICid_GE[i],i]}
    if(sum(is.na(devRatMinBIC_GE))==0)
        plot(devRatMinBIC_GE, xlab="k", main=paste("iClusterPlus result, ", dataset, ", mRNA, ", maxGeneNo, sep=""))

    BIC_CNV=getBIC(cv.fitCNV)
    devR_CNV = getDevR(cv.fitGE)
    minBICid_CNV = apply(BIC_CNV,2,which.min)
    devRatMinBIC_CNV = rep(NA,nK)
    for(i in 1:nK) {devRatMinBIC_CNV[i] = devR_CNV[minBICid_CNV[i],i]}
    if(sum(is.na(devRatMinBIC_CNV))==0)
        plot(devRatMinBIC_CNV, xlab="k", main=paste("iClusterPlus result, ", dataset, ", CNV, ", maxGeneNo, sep=""))

    BIC=getBIC(cv.fit)
    devR = getDevR(cv.fit)
    minBICid = apply(BIC,2,which.min)
    devRatMinBIC = rep(NA,nK)
    for(i in 1:nK) {devRatMinBIC[i] = devR[minBICid[i],i]}
    if(sum(is.na(devRatMinBIC))==0)
        plot(devRatMinBIC, xlab="k", main=paste("iClusterPlus result, ", dataset, ", ", maxGeneNo, sep=""))
}
dev.off()


maxGeneNo=2000
library(survival)
for (dataset in c("METABRIC_discovery","METABRIC_validation")) {
    resultFile=paste(resultPath, "iClusterPlus_", dataset, "_", maxGeneNo, ".RData" ,sep="")
    load(resultFile)
    clusters_GE=getClusters(cv.fitGE)
    clusters_CNV=getClusters(cv.fitCNV)
    clusters=getClusters(cv.fit)

    ## these number of subtypes were determined using the BIC curves, the devRatMinBIC plots
    if (dataset=="METABRIC_discovery") {
        kGE=9
        kCNV=9
        k=9
    } else if (dataset=="METABRIC_validation") {
        kGE=9
        kCNV=9
        k=8
    }

    groups_GE=clusters_GE[,kGE]
    groups_GE <- factor(groups_GE)

    groups_CNV=clusters_CNV[,kCNV]
    groups_CNV <- factor(groups_CNV)

    groups_All=clusters[,k]
    groups_All <- factor(groups_All)

    pdfFile=paste(resultPath, "iClusterPlus_", dataset, "_", maxGeneNo, ".pdf" ,sep="")
    pdf(pdfFile)

    survi=survivalDFS[patients,]
    groups=as.factor(groups_GE)
    coxp <- round(summary(coxph(Surv(time = Survival, event = Death) ~ groups, data = survi, ties="exact"))$sctest[3],15)
    CI=round(summary(coxph(Surv(time = Survival, event = Death) ~ groups, data = survi))$concordance[1],3)
    mfit <- survfit(Surv(Survival, Death == 1) ~ groups, data = survi)
    plot(mfit, col=unique(groups), main = paste("DFS survival for mRNA, ", dataset, " (iClusterPlus), mRNA", sep=""), xlab = "Days", ylab="Survival", lwd=2)
    legend("top", legend = paste("Cox p-value=", coxp, ", CI=", CI, sep=""))

    groups=as.factor(groups_CNV)
    coxp <- round(summary(coxph(Surv(time = Survival, event = Death) ~ groups, data = survi, ties="exact"))$sctest[3],10)
    CI=round(summary(coxph(Surv(time = Survival, event = Death) ~ groups, data = survi))$concordance[1],3)
    mfit <- survfit(Surv(Survival, Death == 1) ~ factor(groups), data = survi)
    plot(mfit, col=unique(groups), main = paste("DFS survival curves for CNV, ", dataset, " (iClusterPlus), CNV", sep=""), xlab = "Days", ylab="Survival", lwd=2)
    legend("top", legend = paste("Cox p-value=", coxp, ", CI=", CI, sep=""))

    groups=groups_All
    coxp <- round(summary(coxph(Surv(time = Survival, event = Death) ~ groups, data = survi, ties="exact"))$sctest[3],10)
    CI=round(summary(coxph(Surv(time = Survival, event = Death) ~ groups, data = survi))$concordance[1],3)
    mfit <- survfit(Surv(Survival, Death == 1) ~ factor(groups), data = survi)
    plot(mfit, col=unique(groups), main = paste("DFS survival curves for combined data, ", dataset, " (iClusterPlus)", sep=""), xlab = "Days", ylab="Survival", lwd=2)
    legend("top", legend = paste("Cox p-value=", coxp, ", CI=", CI, sep=""))


    survi=survival[patients,]
    groups=as.factor(groups_GE)
    coxp <- round(summary(coxph(Surv(time = Survival, event = Death) ~ groups, data = survi, ties="exact"))$sctest[3],10)
    CI=round(summary(coxph(Surv(time = Survival, event = Death) ~ groups, data = survi))$concordance[1],3)
    mfit <- survfit(Surv(Survival, Death == 1) ~ groups, data = survi)
    plot(mfit, col=unique(groups), main = paste("Overall survival for mRNA, ", dataset, " (iClusterPlus), mRNA", sep=""), xlab = "Days", ylab="Survival", lwd=2)
    legend("top", legend = paste("Cox p-value=", coxp, ", CI=", CI, sep=""))

    groups=as.factor(groups_CNV)
    coxp <- round(summary(coxph(Surv(time = Survival, event = Death) ~ groups, data = survi, ties="exact"))$sctest[3],10)
    CI=round(summary(coxph(Surv(time = Survival, event = Death) ~ groups, data = survi))$concordance[1],3)
    mfit <- survfit(Surv(Survival, Death == 1) ~ factor(groups), data = survi)
    plot(mfit, col=unique(groups), main = paste("Overall survival curves for CNV, ", dataset, " (iClusterPlus), CNV", sep=""), xlab = "Days", ylab="Survival", lwd=2)
    legend("top", legend = paste("Cox p-value=", coxp, ", CI=", CI, sep=""))

    groups=groups_All
    coxp <- round(summary(coxph(Surv(time = Survival, event = Death) ~ groups, data = survi, ties="exact"))$sctest[3],10)
    CI=round(summary(coxph(Surv(time = Survival, event = Death) ~ groups, data = survi))$concordance[1],3)
    mfit <- survfit(Surv(Survival, Death == 1) ~ factor(groups), data = survi)
    plot(mfit, col=unique(groups), main = paste("Overall survival curves for combined data, ", dataset, " (iClusterPlus)", sep=""), xlab = "Days", ylab="Survival", lwd=2)
    legend("top", legend = paste("Cox p-value=", coxp, ", CI=", CI, sep=""))

    dev.off()
}


