rm(list=ls())

dataPath="~/Desktop/Projects/SubTyping/DataGEO/"
resultPath="~/Desktop/Projects/SubTyping/PackageAndTesting/CCResult/"

# dataPath="/wsu/home/ex/ex60/ex6091/Subtyping/PackageAndTesting/DataGEO/"
# resultPath="/wsu/home/ex/ex60/ex6091/Subtyping/PackageAndTesting/CCResult/"

#IMPORTANT: restart R after installing ConsensusClusterPlus. Otherwise the script crashes with strange message 
#Error: lazy-load database 'P' is corrupt
library(ConsensusClusterPlus)

clusRange=2:10

for (dataset in c("AML2004", "GSE10245", "GSE19188","GSE14924", "Brain2002", "GSE43580","Lung2001", "GSE15061")) {
  load(paste(dataPath,dataset,".RData",sep=""))
  
  data=get(paste("gene_",dataset,sep=""))
  group=get(paste("group_",dataset,sep=""))
  
  #remove healthy samples
  data=data[!rownames(data)%in%rownames(group)[group[,2]=="healthy"],]
  group=group[!rownames(group)%in%rownames(group)[group[,2]=="healthy"],]
  
  pca=prcomp(data)
  
  t1=Sys.time()
  
  d <- t(data)
  #   mads=apply(d,1,mad)
  #   d=d[rev(order(mads))[1:5000],]
  d = sweep(d,1, apply(d,1,median,na.rm=T))
  
  
  path=paste(resultPath, dataset,sep="")
  if (!file.exists(path)) {dir.create(path)}
  results = ConsensusClusterPlus(d,maxK=max(clusRange),reps=1000,pItem=0.8,pFeature=1, title=path,clusterAlg="hc",distance="pearson", plot="png", seed=888)
  #icl = calcICL(results,title=path,plot="png")
  
  t2=Sys.time()
  
  resultFile=paste(resultPath, "CC_", dataset,".RData",sep="")
  save(results, dataset, data, group, pca, t1, t2, file=resultFile)
}



