rm(list=ls())

# dataPath="~/Desktop/Projects/SubTyping/DataTCGA/"
# resultPath="~/Desktop/Projects/SubTyping/PackageAndTesting/iClusterPlusResult/"

dataPath="/wsu/home/ex/ex60/ex6091/Subtyping/PackageAndTesting/DataTCGA/"
resultPath="/wsu/home/ex/ex60/ex6091/Subtyping/PackageAndTesting/iClusterPlusResult/"

library(iClusterPlus)

## First, set all the parameters:
maxGeneNo=2000
kRange=1:9 ## would be 2:10 clusters
nThread=60


for (maxGeneNo in c(2000, 3000, 4000)) {
    for (dataset in c("LUSC", "GBM", "BRCA", "LAML", "KIRC", "COAD")) {
      set.seed(1)
      
      prefix=paste(dataPath, dataset, "/", sep="")
      
      file=paste(prefix, dataset, "_ProcessedData.RData" ,sep="")
      load(file)
      
      patients=rownames(survival)
      patients=intersect(patients,rownames(mydatGE))
      patients=intersect(patients,rownames(mydatME))
      patients=intersect(patients,rownames(mydatMI))
        
      mydatGE=as.matrix(mydatGE[patients,])
      mydatME=as.matrix(mydatME[patients,])
      mydatMI=as.matrix(mydatMI[patients,])
      survival=survival[patients,]
      
      
      ## get top features only
      if (ncol(mydatGE) > maxGeneNo) {
          sds=apply(mydatGE,FUN=mad,MARGIN=2)
          mydatGE=mydatGE[,order(sds, decreasing = T)[1:maxGeneNo]]
      }
      
      if (ncol(mydatME) > maxGeneNo) {
          sds=apply(mydatME,FUN=mad,MARGIN=2)
          mydatME=mydatME[,order(sds, decreasing = T)[1:maxGeneNo]]
      }
      
      if (ncol(mydatMI) > maxGeneNo) {
          sds=apply(mydatMI,FUN=mad,MARGIN=2)
          mydatMI=mydatMI[,order(sds, decreasing = T)[1:maxGeneNo]]
      }
      
      t1_GE=Sys.time()
      cv.fitGE=alist()
      for (k in kRange) {
          cv.fitGE[[k]]=tune.iClusterPlus(cpus=nThread, dt1=mydatGE, K=k, type=c("gaussian"))
      }
      t2_GE=Sys.time()
      print(t2_GE-t1_GE)
      
      t1_ME=Sys.time()
      cv.fitME=alist()
      for (k in kRange) {
          cv.fitME[[k]]=tune.iClusterPlus(cpus=nThread, dt1=mydatME, K=k, type=c("gaussian"))
      }
      t2_ME=Sys.time()
      print(t2_ME-t1_ME)
      
      t1_MI=Sys.time()
      cv.fitMI=alist()
      for (k in kRange) {
          cv.fitMI[[k]]=tune.iClusterPlus(cpus=nThread, dt1=mydatMI, K=k, type=c("gaussian"))
      }
      t2_MI=Sys.time()
      print(t2_MI-t1_MI)
      
        
      t1=Sys.time()
      cv.fit=alist()
      if (dataset%in%c("LUSC", "BRCA", "LAML", "KIRC", "COAD")) {
          for (k in kRange) {
            cv.fit[[k]]=tune.iClusterPlus(cpus=nThread, dt1=mydatGE, dt2=mydatME, K=k, type=c("gaussian", "gaussian"))
          }
      } else {
          for (k in kRange) {
            cv.fit[[k]]=tune.iClusterPlus(cpus=nThread, dt1=mydatGE, dt2=mydatME, dt3=mydatMI, K=k, type=c("gaussian", "gaussian", "gaussian"))
          }
      }
      t2=Sys.time()
      print(t2-t1)
      
      resultFile=paste(resultPath, "iClusterPlus_", dataset,"_", maxGeneNo, ".RData" ,sep="")  
      save(cv.fitGE, t1_GE, t2_GE, cv.fitME, t1_ME, t2_ME, cv.fitMI, t1_MI, t2_MI, cv.fit, t1, t2, mydatGE, mydatME, mydatMI, survival, file=resultFile)
      
      
    }
}


library(iClusterPlus)
maxGeneNo=2000
pdfFile=paste(resultPath, "iClusterPlus_TCGA_Summary_", maxGeneNo,".pdf" ,sep="") 
pdf(pdfFile)
for (dataset in c("KIRC", "GBM", "LAML", "LUSC", "BRCA", "COAD")) {
    resultFile=paste(resultPath, "iClusterPlus_", dataset, "_", maxGeneNo, ".RData" ,sep="")
    load(resultFile)

    nK = length(cv.fit)
        
    BIC_GE=getBIC(cv.fitGE)
    devR_GE = getDevR(cv.fitGE) 
    minBICid_GE = apply(BIC_GE,2,which.min)
    devRatMinBIC_GE = rep(NA,nK)
    for(i in 1:nK) {devRatMinBIC_GE[i] = devR_GE[minBICid_GE[i],i]} 
    if(sum(is.na(devRatMinBIC_GE))==0)
        plot(devRatMinBIC_GE, xlab="k", main=paste("iClusterPlus result, ", dataset, ", mRNA, ", maxGeneNo, sep=""))
    
    BIC_ME=getBIC(cv.fitME)
    devR_ME = getDevR(cv.fitGE) 
    minBICid_ME = apply(BIC_ME,2,which.min)
    devRatMinBIC_ME = rep(NA,nK)
    for(i in 1:nK) {devRatMinBIC_ME[i] = devR_ME[minBICid_ME[i],i]} 
    if(sum(is.na(devRatMinBIC_ME))==0)
        plot(devRatMinBIC_ME, xlab="k", main=paste("iClusterPlus result, ", dataset, ", Methylation, ", maxGeneNo, sep=""))
    
    BIC_MI=getBIC(cv.fitMI)
    devR_MI = getDevR(cv.fitGE) 
    minBICid_MI = apply(BIC_MI,2,which.min)
    devRatMinBIC_MI = rep(NA,nK)
    for(i in 1:nK) {devRatMinBIC_MI[i] = devR_MI[minBICid_MI[i],i]} 
    if(sum(is.na(devRatMinBIC_MI))==0)
        plot(devRatMinBIC_MI, xlab="k", main=paste("iClusterPlus result, ", dataset, ", miRNA, ", maxGeneNo, sep=""))
    
    BIC=getBIC(cv.fit)
    devR = getDevR(cv.fit) 
    minBICid = apply(BIC,2,which.min)
    devRatMinBIC = rep(NA,nK)
    for(i in 1:nK) {devRatMinBIC[i] = devR[minBICid[i],i]} 
    if(sum(is.na(devRatMinBIC))==0)
        plot(devRatMinBIC, xlab="k", main=paste("iClusterPlus result, ", dataset, ", ", maxGeneNo, sep=""))
}
dev.off()



maxGeneNo=2000
library(survival)
for (dataset in c("KIRC", "GBM", "LAML", "LUSC", "BRCA", "COAD")) {
    resultFile=paste(resultPath, "iClusterPlus_", dataset, "_", maxGeneNo, ".RData" ,sep="")
    load(resultFile)
    clusters_GE=getClusters(cv.fitGE)
    clusters_ME=getClusters(cv.fitME)
    if (dataset=="GBM") 
        clusters_MI=getClusters(cv.fitMI)
    else
        clusters_MI=NA
    clusters=getClusters(cv.fit)
    
    ## these number of subtypes were determined using the BIC curves, the devRatMinBIC plots
    if (dataset=="KIRC") {
        kGE=8
        kME=9
        kMI=NA
        k=5
    } else if (dataset=="GBM") {
        kGE=9
        kME=9
        kMI=9
        k=4
    } else if (dataset=="LAML") {
        kGE=5
        kME=9
        kMI=NA
        k=4
    } else if (dataset=="LUSC") {
        kGE=6
        kME=9
        kMI=NA
        k=3
    } else if (dataset=="BRCA") {
        kGE=8
        kME=9
        kMI=NA
        k=9
    } else if (dataset=="COAD") {
        kGE=5
        kME=9
        kMI=NA
        k=9
    }
    
    groups_GE=clusters_GE[,kGE]
    groups_GE <- factor(groups_GE); names(groups_GE) = rownames(survival)
    
    groups_ME=clusters_ME[,kME]
    groups_ME <- factor(groups_ME); names(groups_ME) = rownames(survival)
    
    if (dataset=="GBM") {
        groups_MI=clusters_MI[,kMI]
        groups_MI <- factor(groups_MI); names(groups_MI) = rownames(survival)
    } else {
        groups_MI=NA
    }
    
    groups_All=clusters[,k]
    groups_All <- factor(groups_All); names(groups_All) = rownames(survival)
    
    pdfFile=paste(resultPath, "iClusterPlus_", dataset, "_", maxGeneNo, ".pdf" ,sep="")
    pdf(pdfFile)

    groups=groups_GE
    coxFit <- coxph(Surv(time = Survival, event = Death) ~ groups, data = survival, ties="exact")
    #summary(coxFit)
    mfit <- survfit(Surv(Survival, Death == 1) ~ factor(groups), data = survival)
    plot(mfit, col=unique(groups), main = paste("Survival curves for combined data of ", dataset, " (iClusterPlus), mRNA", sep=""), xlab = "Days", ylab="Survival", lwd=2)
    legend("top", legend = paste("Cox p-value:", round(summary(coxFit)$sctest[3],digits = 5), sep=""))
    legend("topright", fill=levels(factor(groups)), legend=paste("Group ",levels(factor(groups)), ": ", table(groups)[levels(factor(groups))], sep=""))
    
    
    groups=groups_ME
    coxFit <- coxph(Surv(time = Survival, event = Death) ~ groups, data = survival, ties="exact")
    #summary(coxFit)
    mfit <- survfit(Surv(Survival, Death == 1) ~ factor(groups), data = survival)
    plot(mfit, col=unique(groups), main = paste("Survival curves for combined data of ", dataset, " (iClusterPlus), Methylation", sep=""), xlab = "Days", ylab="Survival", lwd=2)
    legend("top", legend = paste("Cox p-value:", round(summary(coxFit)$sctest[3],digits = 5), sep=""))
    legend("topright", fill=levels(factor(groups)), legend=paste("Group ",levels(factor(groups)), ": ", table(groups)[levels(factor(groups))], sep=""))
    
    if (dataset=="GBM") {
        groups=groups_MI
        coxFit <- coxph(Surv(time = Survival, event = Death) ~ groups, data = survival, ties="exact")
        #summary(coxFit)
        mfit <- survfit(Surv(Survival, Death == 1) ~ factor(groups), data = survival)
        plot(mfit, col=unique(groups), main = paste("Survival curves for combined data of ", dataset, " (iClusterPlus), miRNA", sep=""), xlab = "Days", ylab="Survival", lwd=2)
        legend("top", legend = paste("Cox p-value:", round(summary(coxFit)$sctest[3],digits = 5), sep=""))
        legend("topright", fill=levels(factor(groups)), legend=paste("Group ",levels(factor(groups)), ": ", table(groups)[levels(factor(groups))], sep=""))
    }
    
    groups=groups_All
    coxFit <- coxph(Surv(time = Survival, event = Death) ~ groups, data = survival, ties="exact")
    #summary(coxFit)
    mfit <- survfit(Surv(Survival, Death == 1) ~ factor(groups), data = survival)
    plot(mfit, col=unique(groups), main = paste("Survival curves for combined data of ", dataset, " (iClusterPlus)", sep=""), xlab = "Days", ylab="Survival", lwd=2)
    legend("top", legend = paste("Cox p-value:", round(summary(coxFit)$sctest[3],digits = 5), sep=""))
    legend("topright", fill=levels(factor(groups)), legend=paste("Group ",levels(factor(groups)), ": ", table(groups)[levels(factor(groups))], sep=""))
    
    dev.off()
}
dev.off()


