rm(list=ls())

# Please set dataPath and resultPath correctly
dataPath="~/Desktop/Projects/SubTyping/DataTCGA/"
resultPath="~/Desktop/Projects/SubTyping/PackageAndTesting/PINSResult/"

# dataPath="/wsu/home/ex/ex60/ex6091/Subtyping/PackageAndTesting/DataTCGA/"
# resultPath="/wsu/home/ex/ex60/ex6091/Subtyping/PackageAndTesting/PINSResult/"


library(PINS)
library(survival)

Kmax=10
iter=200
noisePercent="med"
kmIter=200


datasets=c("KIRC", "LUSC", "BRCA", "LAML", "GBM", "COAD")

for (agree in c(0.4, 0.45, 0.5, 0.6, 0.7)) {
    for (dataset in datasets) {
      set.seed(1)
      
      ####Using Perturbation-------------------------->
      file=paste(dataPath,dataset, "/", dataset, "_ProcessedData.RData" ,sep="") 
      load(file) 
      
      t1=Sys.time()
      
      patients=rownames(survival)
      patients=intersect(patients,rownames(mydatGE))
      patients=intersect(patients,rownames(mydatME))
      patients=intersect(patients,rownames(mydatMI))
      
      mydatGE=mydatGE[patients,]
      mydatME=mydatME[patients,]
      mydatMI=mydatMI[patients,]
      survival=survival[patients,]
      
      clinical <- read.table(file=paste(dataPath, dataset, "/", dataset ,"_Clinical.txt", sep=""), sep="\t", header=T, row.names=1,stringsAsFactors = F, fill=T)
      clinical <- clinical[-1,];clinical <- clinical[-1,]
      a<-rownames(clinical)
      rownames(clinical)<-paste(substr(a,1,4),substr(a,6,7),substr(a,9,12),sep=".")
      clinical <- clinical[rownames(survival),]
      
      Kmax=10
      
      dataList <- list (mydatGE, mydatME, mydatMI) 
      names(dataList) = c("GE", "ME", "MI")
      result=SubtypingOmicsData(dataList = dataList, Kmax = Kmax, noisePercent = noisePercent, iter = iter, kmIter = kmIter, agreementCutoff = agree)
      
      
      t2=Sys.time()
      print(t2-t1)
      
      resultFile=paste(resultPath, "PINS_", dataset, "_agree", agree, ".RData" ,sep="")
      save(dataset, Kmax, dataList, survival, clinical, result, t1, t2, file=resultFile)
      
      pdfFile=paste(resultPath, "PINS_", dataset, "_agree", agree, ".pdf" ,sep="")
      pdf(pdfFile)
      
      groups = result$groups
      groups2=result$groups2
      
      plot(result$dataTypeResult[[1]]$Discrepancy$AUC, ylab= "Area under the curve", xlab="Cluster number", main=paste("AUC of gene expression for ", dataset, " data", sep=""))
      lines(1:Kmax, result$dataTypeResult[[1]]$Discrepancy$AUC)
      points(result$dataTypeResult[[1]]$k, result$dataTypeResult[[1]]$Discrepancy$AUC[result$dataTypeResult[[1]]$k],col="red")
      
      plot(result$dataTypeResult[[2]]$Discrepancy$AUC, ylab= "Area under the curve", xlab="Cluster number", main=paste("AUC of methylation for ", dataset, " data", sep=""))
      lines(1:Kmax, result$dataTypeResult[[2]]$Discrepancy$AUC)
      points(result$dataTypeResult[[2]]$k, result$dataTypeResult[[2]]$Discrepancy$AUC[result$dataTypeResult[[2]]$k],col="red")
      
      plot(result$dataTypeResult[[3]]$Discrepancy$AUC, ylab= "Area under the curve", xlab="Cluster number", main=paste("AUC of microRNA for ", dataset, " data", sep=""))
      lines(1:Kmax, result$dataTypeResult[[3]]$Discrepancy$AUC)
      points(result$dataTypeResult[[3]]$k, result$dataTypeResult[[3]]$Discrepancy$AUC[result$dataTypeResult[[3]]$k],col="red")
      
      coxFit <- coxph(Surv(time = Survival, event = Death) ~ as.factor(result$dataTypeResult[[1]]$groups), data = survival, ties="exact")
      mfit <- survfit(Surv(Survival, Death == 1) ~ as.factor(result$dataTypeResult[[1]]$groups), data = survival)
      plot(mfit, col=unique(result$dataTypeResult[[1]]$groups), main = paste("Survival curves for gene expression of ",dataset, " (PertCluster)", sep=""), xlab = "Days", ylab="Survival", lwd=2)
      legend("topright", legend = paste("Cox p-value:", round(summary(coxFit)$sctest[3],digits = 5), sep=""))
      
      coxFit <- coxph(Surv(time = Survival, event = Death) ~ as.factor(result$dataTypeResult[[2]]$groups), data = survival, ties="exact")
      mfit <- survfit(Surv(Survival, Death == 1) ~ as.factor(result$dataTypeResult[[2]]$groups), data = survival)
      plot(mfit, col=unique(result$dataTypeResult[[2]]$groups), main = paste("Survival curves for methylation of ", dataset, " (PertCluster)", sep=""), xlab = "Days", ylab="Survival", lwd=2)
      legend("topright", legend = paste("Cox p-value:", round(summary(coxFit)$sctest[3],digits = 5), sep=""))
      
      coxFit <- coxph(Surv(time = Survival, event = Death) ~ as.factor(result$dataTypeResult[[3]]$groups), data = survival, ties="exact")
      mfit <- survfit(Surv(Survival, Death == 1) ~ as.factor(result$dataTypeResult[[3]]$groups), data = survival)
      plot(mfit, col=unique(result$dataTypeResult[[3]]$groups), main = paste("Survival curves for microRNA of ", dataset, " (PertCluster)", sep=""), xlab = "Days", ylab="Survival", lwd=2)
      legend("topright", legend = paste("Cox p-value:", round(summary(coxFit)$sctest[3],digits = 5), sep=""))
      
      ageCol <- abs(as.numeric(clinical$"birth_days_to"))/365
      names(ageCol) <- rownames(clinical)
      age <- list()
      for (j in levels(factor(groups))) {
        age[[j]] <- ageCol[names(groups[groups==j])]
      }
      boxplot(age, main=paste("Age distribution, ", dataset, sep=""), xlab="Groups", ylab="Age")
      
      coxFit <- coxph(Surv(time = Survival, event = Death) ~ as.factor(groups), data = survival[names(groups),], ties="exact")
      mfit <- survfit(Surv(Survival, Death == 1) ~ as.factor(groups), data = survival[names(groups),])
      plot(mfit, col=levels(factor(groups)), main = paste("Survival curves for ", dataset, ", level 1 (PertCluster)", sep=""), xlab = "Days", ylab="Survival", lwd=2)
      legend("top", legend = paste("Cox p-value:", round(summary(coxFit)$sctest[3],digits = 5), sep=""))
      legend("topright", fill=levels(factor(groups)), legend=paste("Group ",levels(factor(groups)), ": ", table(groups)[levels(factor(groups))], sep=""))
      
      age <- list()
      for (j in levels(factor(groups2))) {
        age[[j]] <- ageCol[names(groups2[groups2==j])]
      }
      boxplot(age, main=paste("Age distribution, ", dataset, sep=""), xlab="Groups", ylab="Age")
      
      coxFit <- coxph(Surv(time = Survival, event = Death) ~ as.factor(groups2), data = survival, ties="exact")
      mfit <- survfit(Surv(Survival, Death == 1) ~ as.factor(groups2), data = survival)
      a <-intersect(unique(groups2), unique(groups));names(a) <- intersect(unique(groups2), unique(groups)); a[setdiff(unique(groups2), unique(groups))] <- seq(setdiff(unique(groups2), unique(groups)))+max(groups)
      colors <- a[levels(factor(groups2))]
      plot(mfit, col=colors, main = paste("Survival curves for ", dataset, ", level 2 (PertCluster)", sep=""), xlab = "Days", ylab="Survival", lwd=2)
      legend("top", legend = paste("Cox p-value:", round(summary(coxFit)$sctest[3],digits = 5), sep=""))
      legend("topright", fill=colors, legend=paste("Group ",levels(factor(groups2)), ": ", table(groups2)[levels(factor(groups2))], sep=""))
      
      
      dev.off()
    }
}


### This is for testing IDH mutation
# resultPath="~/Desktop/Projects/SubTyping/PackageAndTesting/PINSResult/"
# load(paste(resultPath,"PINS_GBM.RData",sep = "")); groups=result$groups
# X=read.csv(file=paste(resultPath, "IDH1-Sample-Status.txt", sep=""), header=T, sep = "\t", stringsAsFactors=FALSE)
# X$CLID=gsub("[-]", ".", X$CLID); X=X
# IDH1=X$IDH1mut; names(IDH1)=X$CLID
# foundST=X$Subtype; names(foundST)=X$CLID
# 
# confTable=confusionMatrix(IDH1, groups); confTable
# 
# fisher.test(confTable[1:2,1:2])



