library(flexclust)
library(scales)
library(cluster)
library(survival)

### TCGA counter example
rm(list=ls())



# Please set dataPath and resultPath correctly
dataPath="~/Desktop/Projects/SubTyping/DataTCGA/"
resultPath="~/Desktop/Projects/SubTyping/PackageAndTesting/SilhouetteResult/"
Kmax=10
set.seed(800)
datasets=c("GBM", "KIRC", "LUSC", "BRCA", "COAD", "LAML")
for (dataset in datasets) {
  ####Using Perturbation-------------------------->
  file=paste(dataPath,dataset, "/", dataset, "_ProcessedData.RData" ,sep="") 
  load(file) 
  
  t1=Sys.time()
  
  patients=rownames(survival)
  patients=intersect(patients,rownames(mydatGE))
  patients=intersect(patients,rownames(mydatME))
  patients=intersect(patients,rownames(mydatMI))
  
  mydatGE=mydatGE[patients,]
  mydatME=mydatME[patients,]
  mydatMI=mydatMI[patients,]
  dataAll=cbind(mydatGE, mydatME, mydatMI)
  survival=survival[patients,]
  
  pcaGE=prcomp(mydatGE)$x
  pcaME=prcomp(mydatME)$x
  pcaMI=prcomp(mydatMI)$x
  pcaAll=prcomp(dataAll)$x
  
  distGE=daisy(pcaGE)
  distME=daisy(pcaME)
  distMI=daisy(pcaMI)
  distAll=daisy(pcaAll)
  
  silGE=0
  groupGE=NULL
  for (i in 2:Kmax) {
    g <- kmeans(pcaGE, centers = i, 100, 20)$cluster
    if (mean(silhouette(g, distGE)[,3]) > silGE) {
      silGE <- mean(silhouette(g, distGE)[,3])
      groupGE <- g
    }
  }
  
  silME=0
  groupME=NULL
  for (i in 2:Kmax) {
    g <- kmeans(pcaME, centers = i, 100, 20)$cluster
    if (mean(silhouette(g, distME)[,3]) > silME) {
      silME <- mean(silhouette(g, distME)[,3])
      groupME <- g
    }
  }
  
  silMI=0
  groupMI=NULL
  for (i in 2:Kmax) {
    g <- kmeans(pcaMI, centers = i, 100, 20)$cluster
    if (mean(silhouette(g, distMI)[,3]) > silMI) {
      silMI <- mean(silhouette(g, distMI)[,3])
      groupMI <- g
    }
  }
  
  silAll=0
  groupAll=NULL
  for (i in 2:Kmax) {
    g <- kmeans(pcaAll, centers = i, 100, 20)$cluster
    if (mean(silhouette(g, distAll)[,3]) > silAll) {
      silAll <- mean(silhouette(g, distAll)[,3])
      groupAll <- g
    }
  }
  
  t2=Sys.time()
  print(t2-t1)
  
  resultFile=paste(resultPath, "Silhouette_", dataset, ".RData" ,sep="")
  save(dataset, Kmax, survival, groupGE, groupME, groupMI, groupAll, silGE, silME, silMI, silAll, t1, t2, file=resultFile)
  
  pdfFile=paste(resultPath, "Silhouette_", dataset, ".pdf" ,sep="")
  pdf(pdfFile)
  
  coxFit <- coxph(Surv(time = Survival, event = Death) ~ as.factor(groupGE), data = survival, ties="exact")
  mfit <- survfit(Surv(Survival, Death == 1) ~ as.factor(groupGE), data = survival)
  plot(mfit, col=unique(groupGE), main = paste("Survival curves for gene expression of ",dataset, " (Silhouette)", sep=""), xlab = "Days", ylab="Survival", lwd=2)
  legend("topright", c(paste("Cox p-value:", round(summary(coxFit)$sctest[3],digits = 5), sep=""), paste0("silhouette:", silGE)))
  
  coxFit <- coxph(Surv(time = Survival, event = Death) ~ as.factor(groupME), data = survival, ties="exact")
  mfit <- survfit(Surv(Survival, Death == 1) ~ as.factor(groupME), data = survival)
  plot(mfit, col=unique(groupME), main = paste("Survival curves for methylation of ", dataset, " (Silhouette)", sep=""), xlab = "Days", ylab="Survival", lwd=2)
  legend("topright", legend = c(paste("Cox p-value:", round(summary(coxFit)$sctest[3],digits = 5), sep=""), paste0("silhouette:", silME)))
  
  coxFit <- coxph(Surv(time = Survival, event = Death) ~ as.factor(groupMI), data = survival, ties="exact")
  mfit <- survfit(Surv(Survival, Death == 1) ~ as.factor(groupMI), data = survival)
  plot(mfit, col=unique(groupMI), main = paste("Survival curves for microRNA of ", dataset, " (Silhouette)", sep=""), xlab = "Days", ylab="Survival", lwd=2)
  legend("topright", legend = c(paste("Cox p-value:", round(summary(coxFit)$sctest[3],digits = 5), sep=""), paste0("silhouette:", silMI)))
  
  coxFit <- coxph(Surv(time = Survival, event = Death) ~ as.factor(groupAll), data = survival, ties="exact")
  mfit <- survfit(Surv(Survival, Death == 1) ~ as.factor(groupAll), data = survival)
  plot(mfit, col=levels(factor(groupAll)), main = paste("Survival curves for ", dataset, ", Silhouette method", sep=""), xlab = "Days", ylab="Survival", lwd=2)
  legend("top", c(paste("Cox p-value:", round(summary(coxFit)$sctest[3],digits = 5), sep=""), paste0("silhouette:", silAll)))
  legend("topright", fill=levels(factor(groupAll)), legend=paste("Group ",levels(factor(groupAll)), ": ", table(groupAll)[levels(factor(groupAll))], sep=""))
  
  dev.off()
}