rm(list=ls())
set.seed(1)
dataPath="~/Desktop/Projects/SubTyping/DataGEO/"
resultPath="~/Desktop/Projects/SubTyping/PackageAndTesting/SNFResult/"

library("flexclust")
#load this last to reserve the distance function functions. 
#please restart your R
#For unknown reason, loading some other packages may change the SNF results slightly
library(SNFtool)

## First, set all the parameters:
K = 20;##number of neighbors, usually (10~30)
alpha = 0.5; ##hyperparameter, usually (0.3~0.8)
NIT = 10; ###Number of Iterations, usually (10~20)

datasets = c("AML2004", "GSE10245", "GSE19188", "Brain2002", "GSE43580","Lung2001", "GSE15061")

# default values
ARI=data.frame(row.names = datasets)
for (i in 1:length(datasets)) {
  dataset = datasets[i]
  message(dataset)
  load(paste(dataPath, dataset,".RData",sep=""))
  
  data=get(paste("gene_",dataset,sep=""))
  group=get(paste("group_",dataset,sep=""))
  
  #remove healthy samples
  data=data[!rownames(data)%in%rownames(group)[group[,2]=="healthy"],]
  group=group[!rownames(group)%in%rownames(group)[group[,2]=="healthy"],]
  
  #1 Normalization
  geneEData = standardNormalization(data)
  #geneEData = data
  
  #2. Calculate the pair-wise distance
  PSMgeneE = dist2(as.matrix(geneEData),as.matrix(geneEData));
  
  #3. construct similarity graphs
  W1 = affinityMatrix(PSMgeneE, K, alpha)
  
  #Groups with just geneExpression
  C = estimateNumberOfClustersGivenGraph(W1)  #number of clusters
  groupSNF = spectralClustering(W1,C[[1]]);
  
  ARI[i,1]=randIndex(groupSNF,group[,2], correct = T)
}


alphaRange=c(0.5, 0.45, 0.47, 0.53, 0.55)
for (i in 1:length(datasets)) {
    dataset = datasets[i]
    message(dataset)
    load(paste(dataPath, dataset,".RData",sep=""))
    
    data=get(paste("gene_",dataset,sep=""))
    group=get(paste("group_",dataset,sep=""))
    
    #remove healthy samples
    data=data[!rownames(data)%in%rownames(group)[group[,2]=="healthy"],]
    group=group[!rownames(group)%in%rownames(group)[group[,2]=="healthy"],]
    
    #1 Normalization
    geneEData = standardNormalization(data)
    #geneEData = data
    
    #2. Calculate the pair-wise distance
    PSMgeneE = dist2(as.matrix(geneEData),as.matrix(geneEData));
        
    for (j in 2:length(alphaRange)) {
        #3. construct similarity graphs
        W1 = affinityMatrix(PSMgeneE, K, alphaRange[j])
        
        #Groups with just geneExpression
        C = estimateNumberOfClustersGivenGraph(W1)  #number of clusters
        groupSNF = spectralClustering(W1,C[[1]]);
        
        ARI[i,j]=randIndex(groupSNF,group[,2], correct = T)
    }
}

colnames(ARI) = alphaRange
ARI=ARI[,order(colnames(ARI))]

str=paste("&",paste(colnames(ARI), collapse = "&"), "\\\\\n",sep="")
for (i in 1:nrow(ARI)) {
    str=paste(str, datasets[i], "&", paste(round(ARI[i,], digits=3), collapse = "&"), "\\\\\n",sep="")
}
cat(str)


