rm(list=ls())

# Please set dataPath and resultPath correctly
dataPath="~/Desktop/Projects/SubTyping/DataGEO/"
resultPath="~/Desktop/Projects/SubTyping/PackageAndTesting/PINSResult/"

# dataPath="/wsu/home/ex/ex60/ex6091/Subtyping/PackageAndTesting/DataGEO/"
# resultPath="/wsu/home/ex/ex60/ex6091/Subtyping/PackageAndTesting/PINSResult/"

library("flexclust")
library(PINS)


Kmax=10
iter=200
noisePercent="med"
kmIter=200



for (dataset in c("AML2004", "GSE10245", "GSE15061", "GSE19188","GSE14924", "Brain2002", "GSE43580","Lung2001")) {
  for (percentile in c(0.25, 0.3, 0.35,  0.4, 0.45, 0.5, 0.55, 0.6, 0.65, 0.7, 0.75)) {
    set.seed(1)
    
    print(paste("SUBTYPING ", dataset, sep=""))
    
    load(paste(dataPath,dataset,".RData",sep=""))
    
    data=(get(paste("gene_",dataset,sep="")))
    group=get(paste("group_",dataset,sep=""))
    #remove healthy samples
    data=data[!rownames(data)%in%rownames(group)[group[,2]=="healthy"],]
    group=group[!rownames(group)%in%rownames(group)[group[,2]=="healthy"],]
    
    #result = PerturbationClustering(data=data, Kmax = Kmax, noisePercent = 0.5, iter = iter, kmIter = kmIter)
    
    result = PerturbationClustering(data=data, Kmax = Kmax, noisePercent = percentile, iter = iter)
    message(dataset, ": percentile ", percentile, "; RI ", randIndex(result$groups, group$Group, correct = FALSE), "; ARI ", randIndex(result$groups, group$Group, correct = TRUE))
    resultFile=paste(resultPath, "PINS_", dataset,"_percentile", percentile, ".RData" ,sep="") 
    save(result, file=resultFile)
  }
}


## show the results
for (dataset in c("GSE10245","GSE19188","GSE43580","GSE15061","GSE14924","Lung2001","AML2004","Brain2002")) {
  ret = NULL
  for (percentile in c(0.25, 0.3, 0.35,  0.4, 0.45, 0.5, 0.55, 0.6, 0.65, 0.7, 0.75)) {
    
    load(paste(dataPath,dataset,".RData",sep=""))
    
    data=(get(paste("gene_",dataset,sep="")))
    group=get(paste("group_",dataset,sep=""))
    #remove healthy samples
    data=data[!rownames(data)%in%rownames(group)[group[,2]=="healthy"],]
    group=group[!rownames(group)%in%rownames(group)[group[,2]=="healthy"],]
    
    #result = PerturbationClustering(data=data, Kmax = Kmax, noisePercent = 0.5, iter = iter, kmIter = kmIter)
    
    resultFile=paste(resultPath, "PINS_", dataset,"_percentile", percentile, ".RData" ,sep="") 
    load(file=resultFile)
    ret=c(ret,round(randIndex(result$groups, group$Group, correct = TRUE), digits = 2))
  }
  
  message(dataset, ": ", paste0(ret, collapse = "&"))
}