rm(list=ls())
set.seed(1)
dataPath="~/Desktop/Projects/SubTyping/DataTCGA/"
resultPath="~/Desktop/Projects/SubTyping/PackageAndTesting/SNFResult/"

# dataPath="/wsu/home/ex/ex60/ex6091/Subtyping/PackageAndTesting/DataTCGA/"
# resultPath="/wsu/home/ex/ex60/ex6091/Subtyping/PackageAndTesting/SNFResult/"

library(survival)
#load this last to reserve the distance function functions. 
#please restart your R
#For unknown reason, loading some other packages may change the SNF results slightly
library(SNFtool)


## First, set all the parameters:
K = 20;##number of neighbors, usually (10~30)
alpha = 0.5; ##hyperparameter, usually (0.3~0.8)
NIT = 10; ###Number of Iterations, usually (10~20)

for (dataset in c("LUSC", "GBM", "BRCA", "LAML", "KIRC", "COAD")) {
  prefix=paste(dataPath, dataset, "/", sep="")
  
  ####Using SNF -------------------------->
  file=paste(prefix, dataset, "_ProcessedData.RData" ,sep="")
  load(file)
  
  t1=Sys.time()
  
  patients=rownames(survival)
  patients=intersect(patients,rownames(mydatGE))
  patients=intersect(patients,rownames(mydatME))
  patients=intersect(patients,rownames(mydatMI))
  
  mydatGE=mydatGE[patients,]
  mydatME=mydatME[patients,]
  mydatMI=mydatMI[patients,]
  survival=survival[patients,]
  
  clinical <- read.table(file=paste(dataPath, dataset,"/Clinical/Biotab//nationwidechildrens.org_clinical_patient_", tolower(dataset), ".txt", sep=""), sep="\t", header=T, row.names=1,stringsAsFactors = F, fill=T)
  clinical <- clinical[-1,];clinical <- clinical[-1,]
  a<-rownames(clinical)
  rownames(clinical)<-paste(substr(a,1,4),substr(a,6,7),substr(a,9,12),sep=".")
  clinical <- clinical[rownames(survival),]
  #View(clinical[order(groups),])
  
  gender <- clinical[,"gender"]
  names(gender) <- rownames(clinical)
  #confusionMatrix(groups, gender)
  
  
  # Normalization
  datGE = standardNormalization(mydatGE)
  datME = standardNormalization(mydatME)
  datMI = standardNormalization(mydatMI)
  
  PSMgeneE = dist2(as.matrix(datGE),as.matrix(datGE));
  PSMmethy = dist2(as.matrix(datME),as.matrix(datME));
  PSMmir = dist2(as.matrix(datMI),as.matrix(datMI));
  
  W1 = affinityMatrix(PSMgeneE, K, alpha)
  W2 = affinityMatrix(PSMmethy, K, alpha)
  W3 = affinityMatrix(PSMmir, K, alpha)
  W = SNF(list(W1,W2,W3), K, NIT)
  
  #####Clustering
  #Groups with just geneExpression
  CGE = estimateNumberOfClustersGivenGraph(W1, 2:10)[[1]]
  groupGE = spectralClustering(W1,CGE);
  #Groups with methylation
  CME = estimateNumberOfClustersGivenGraph(W2, 2:10)[[1]]
  groupME = spectralClustering(W2,CME);
  #Groups with microRNA
  CMI = estimateNumberOfClustersGivenGraph(W3, 2:10)[[1]]
  groupMI = spectralClustering(W3,CMI);
  #Groups with SNF
  C = estimateNumberOfClustersGivenGraph(W, 2:10)[[1]]
  groupSNF = spectralClustering(W,C); #the final subtypes information
  
  pdfFile=paste(resultPath, "SNF_", dataset, ".pdf" ,sep="")  
  pdf(pdfFile)
  
  #Cox log rank test GE: Gene Expression
  groups <- factor(groupGE); names(groups) = rownames(survival)
  coxFit <- coxph(Surv(time = Survival, event = Death) ~ groups, data = survival, ties="exact")
  summary(coxFit)
  
  mfit <- survfit(Surv(Survival, Death == 1) ~ factor(groups), data = survival)
  plot(mfit, col=unique(groups), main = paste("Survival curves for gene expression of ",dataset, " (SNF)", sep=""), xlab = "Days", ylab="Survival", lwd=2)
  legend("topright", legend = paste("Cox p-value:", round(summary(coxFit)$sctest[3],digits = 5), sep=""))  
  
  #Cox log rank test ME: Methylation
  groups <- factor(groupME); names(groups) = rownames(survival)
  coxFit <- coxph(Surv(time = Survival, event = Death) ~ groups, data = survival, ties="exact")
  summary(coxFit)
  
  mfit <- survfit(Surv(Survival, Death == 1) ~ factor(groups), data = survival)
  plot(mfit, col=unique(groups), main = paste("Survival curves for methylation of ", dataset, " (SNF)", sep=""), xlab = "Days", ylab="Survival", lwd=2)
  legend("topright", legend = paste("Cox p-value:", round(summary(coxFit)$sctest[3],digits = 5), sep=""))
  
  #Cox log rank test MI: MicroRNA
  groups <- factor(groupMI); names(groups) = rownames(survival)
  coxFit <- coxph(Surv(time = Survival, event = Death) ~ groups, data = survival, ties="exact")
  summary(coxFit)
  
  mfit <- survfit(Surv(Survival, Death == 1) ~ factor(groups), data = survival)
  plot(mfit, col=unique(groups), main = paste("Survival curves for microRNA of ", dataset, " (SNF)", sep=""), xlab = "Days", ylab="Survival", lwd=2)
  legend("topright", legend = paste("Cox p-value:", round(summary(coxFit)$sctest[3],digits = 5), sep=""))
  
  
  #Cox SNF
  groups <- factor(groupSNF); names(groups) = rownames(survival)
  coxFit <- coxph(Surv(time = Survival, event = Death) ~ groups, data = survival, ties="exact")
  summary(coxFit)
  
  ageCol <- abs(as.numeric(clinical$"birth_days_to"))/365
  names(ageCol) <- rownames(clinical)
  age <- list()
  for (j in levels(factor(groups))) {
    age[[j]] <- ageCol[names(groups[groups==j])]
  }
  boxplot(age, main=paste("Age distribution, ", dataset, sep=""), xlab="Groups", ylab="Age")
  
  
  mfit <- survfit(Surv(Survival, Death == 1) ~ factor(groups), data = survival)
  plot(mfit, col=unique(groups), main = paste("Survival curves for combined data of ", dataset, " (SNF)", sep=""), xlab = "Days", ylab="Survival", lwd=2)
  legend("top", legend = paste("Cox p-value:", round(summary(coxFit)$sctest[3],digits = 5), sep=""))
  legend("topright", fill=levels(factor(groups)), legend=paste("Group ",levels(factor(groups)), ": ", table(groups)[levels(factor(groups))], sep=""))
  #a<-confusionMatrix(groups, gender)
  #a <- cbind(Groups=rownames(a), a, Age=round(sapply(age, median, simplify = T)))
  #addtable2plot(x=0, y=0, a)
  
  dev.off()
  ####Using SNF <--------------------------
  
  t2=Sys.time()
  print(t2-t1)
  
  resultFile=paste(resultPath, "SNF_", dataset, ".RData" ,sep="")  
  save(mydatGE, mydatME, mydatMI, groupGE, groupME, groupMI, groupSNF, t2, t1, file=resultFile)
}
