rm(list=ls())
set.seed(1)
dataPath="~/Desktop/Projects/SubTyping/DataGEO/"
resultPath="~/Desktop/Projects/SubTyping/PackageAndTesting/SNFResult/"

library("flexclust")
#load this last to reserve the distance function functions. 
#please restart your R
#For unknown reason, loading some other packages may change the SNF results slightly
library(SNFtool)

## First, set all the parameters:
K = 20;##number of neighbors, usually (10~30)
alpha = 0.5; ##hyperparameter, usually (0.3~0.8)
NIT = 10; ###Number of Iterations, usually (10~20)

for (dataset in c("AML2004", "GSE10245", "GSE19188", "Brain2002", "GSE43580","Lung2001", "GSE15061")) {
  load(paste(dataPath, dataset,".RData",sep=""))
  
  data=get(paste("gene_",dataset,sep=""))
  group=get(paste("group_",dataset,sep=""))
  
  #remove healthy samples
  data=data[!rownames(data)%in%rownames(group)[group[,2]=="healthy"],]
  group=group[!rownames(group)%in%rownames(group)[group[,2]=="healthy"],]
  
  pca=prcomp(data)    
  
  
  t1=Sys.time()
  
  #1 Normalization
  geneEData = standardNormalization(data)
  #geneEData = data
    
  #2. Calculate the pair-wise distance
  PSMgeneE = dist2(as.matrix(geneEData),as.matrix(geneEData));
  
  #3. construct similarity graphs
  W1 = affinityMatrix(PSMgeneE, K, alpha)
  
  #Groups with just geneExpression
  C = estimateNumberOfClustersGivenGraph(W1)  #number of clusters
  groupSNF = spectralClustering(W1,C[[1]]);
  
  t2=Sys.time()
  print(t2-t1)
  
  resultFile=paste(resultPath,"SNF_", dataset, ".RData" ,sep="")  
  save(dataset, data, groupSNF, group, pca, t1, t2, file=resultFile)
  
  conditions=unique(group[,2]); pch=rep(1, nrow(data))
  for (i in 1:length(conditions)) {
    pch[rownames(data)%in%rownames(group)[group[,2]==conditions[i]]]=i
  }  
}

pdfFile=paste(resultPath, "SNF_GEO_Summary.pdf" ,sep="")  
pdf(pdfFile)
for (dataset in c("AML2004", "GSE10245", "GSE19188", "Brain2002", "GSE43580","Lung2001", "GSE15061")) {
  resultFile=paste(resultPath,"SNF_", dataset, ".RData" ,sep="")  
  load(resultFile)
  ARI=randIndex(groupSNF,group[,2], correct = T)
  RI=randIndex(groupSNF,group[,2], correct = F)
  plot(pca$x, pch=pch, col=groupSNF, main=paste("SNF result, ", dataset, ", ARI=", round(ARI,3), ", RI=", round(RI,3),sep=""))
  legend("bottomleft",legend=conditions, pch=seq(conditions))
}
dev.off()


