rm(list=ls())
dataPath="~/Desktop/Projects/SubTyping/DataGEO/"
SNFPath="~/Desktop/Projects/SubTyping/PackageAndTesting/SNFResult/"
PINSPath="~/Desktop/Projects/SubTyping/PackageAndTesting/PINSResult/"
CCPath="~/Desktop/Projects/SubTyping/PackageAndTesting/CCResult/"
iClusterPlusPath="~/Desktop/Projects/SubTyping/PackageAndTesting/iClusterPlusResult/"
pdfPath="~/Desktop/Projects/SubTyping/PackageAndTesting/Plots/Figures/"

library("flexclust")


#-------Perturbation clustering
clusRange=2:10
datasets = c("GSE10245", "GSE19188", "GSE43580", "GSE15061", "GSE14924", "Lung2001", "AML2004", "Brain2002")
PINS_ARIs = SNF_ARIs = CC_ARIs = NULL

for (i in 1:length(datasets)) {
  dataset = datasets[i]
  load(paste(dataPath,dataset,".RData",sep=""))
  
  data=get(paste("gene_",dataset,sep=""))
  group=get(paste("group_",dataset,sep=""))
  
  #remove healthy samples
  data=data[!rownames(data)%in%rownames(group)[group[,2]=="healthy"],]
  group=group[!rownames(group)%in%rownames(group)[group[,2]=="healthy"],]
  
  #get shapes
  conditions=unique(group[,2]); pch=rep(1, nrow(data))
  for (j in 1:length(conditions)) {
    pch[rownames(data)%in%rownames(group)[group[,2]==conditions[j]]]=j
  }  
  
  pca=prcomp(data)    
  
  xlimit=c(min(pca$x[,1]), max(pca$x[,1])+10)
  ylimit=c(min(pca$x[,2]-20), max(pca$x[,2])+10)
  
  resultFile=paste(PINSPath, "PINS_", dataset, ".RData" ,sep="")  
  load(resultFile)
  
  pdfFile=paste(pdfPath, "PINS_GEO_", dataset, ".pdf", sep="")
  pdf(pdfFile)
  
  memb <- result$groups
  
  ARI=randIndex(memb,group[,2], correct = T); PINS_ARIs[i]=ARI
  RI=randIndex(memb,group[,2], correct = F)
  par(tcl=0.3,mgp=c(1.4,0.2,0),mar=c(3,3,3,1), xpd=T)  
  plot(pca$x[, 1:2], pch=pch, col=memb, main=paste("PINS, ", dataset, ", ARI=", round(ARI,2), ", RI=", round(RI,2), sep=""), ylim=ylimit, xlim=xlimit, lwd=2, cex=2, cex.lab=1.7, cex.axis=1.5, cex.main=2)
  legend("topright",legend=conditions, pch=seq(conditions), cex=1.7)
  legend("bottomright",legend=paste("Cluster ", sort(unique(memb)), sep=""), fill=sort(unique(memb)), ncol=ceiling(length(unique(memb))/4), cex=1.7)
  dev.off()
}





# Similarity network fusion
for (i in 1:length(datasets)) {
  set.seed(1)
  
  dataset = datasets[i]
  if (dataset == "GSE14924") {
    SNF_ARIs[i]=0.005 #just to make the bar appears in the plot
  } else {
    resultFile=paste(SNFPath, "SNF_", dataset, ".RData" ,sep="")  
    load(resultFile)
    
    #get shapes
    conditions=unique(group[,2]); pch=rep(1, nrow(data))
    for (j in 1:length(conditions)) {
      pch[rownames(data)%in%rownames(group)[group[,2]==conditions[j]]]=j
    }  
    
    xlimit=c(min(pca$x[,1]), max(pca$x[,1])+10)
    ylimit=c(min(pca$x[,2]-20), max(pca$x[,2])+10)
    
    pdfFile=paste(pdfPath, "SNF_GEO_", dataset, ".pdf", sep="")
    pdf(pdfFile)
    
    memb <- groupSNF
    
    ARI=randIndex(memb,group[,2], correct = T); SNF_ARIs[i]=ARI
    RI=randIndex(memb,group[,2], correct = F)
    par(tcl=0.3,mgp=c(1.4,0.2,0),mar=c(3,3,3,1), xpd=T)  
    plot(pca$x[, 1:2], pch=pch, col=memb, main=paste("SNF, ", dataset, ", ARI=", round(ARI,2), ", RI=", round(RI,2), sep=""), ylim=ylimit, xlim=xlimit, lwd=2, cex=2, cex.lab=1.7, cex.axis=1.5, cex.main=2)
    #title(paste("ARI=", round(ARI,3), ", RI=", round(RI,3), sep=""), line=0.5, cex.main=2)
    legend("topright",legend=conditions, pch=seq(conditions), cex=1.7)
    legend("bottomright",legend=paste("Cluster ", sort(unique(memb)), sep=""), fill=sort(unique(memb)), ncol=ceiling(length(unique(memb))/4), cex=1.7)
    dev.off()
  }
}
dataset="GSE14924"
load(paste(dataPath,dataset,".RData",sep=""))
data=get(paste("gene_",dataset,sep=""))
group=get(paste("group_",dataset,sep=""))
#remove healthy samples
data=data[!rownames(data)%in%rownames(group)[group[,2]=="healthy"],]
group=group[!rownames(group)%in%rownames(group)[group[,2]=="healthy"],]
pca=prcomp(data)  
conditions=unique(group[,2]); pch=rep(1, nrow(data))
for (j in 1:length(conditions)) {
  pch[rownames(data)%in%rownames(group)[group[,2]==conditions[j]]]=j
}  
xlimit=c(min(pca$x[,1]), max(pca$x[,1])+10)
ylimit=c(min(pca$x[,2]-10), max(pca$x[,2])+10)

pdfFile=paste(pdfPath, "SNF_GEO_", dataset, ".pdf", sep="")
pdf(pdfFile)
ARI=NA
RI=NA
par(tcl=0.3,mgp=c(1.4,0.2,0),mar=c(3,3,3,1), xpd=T)  
#plot(pca$x[, 1:2], pch=pch, col=memb, main=paste("PINS result, ", dataset, ", ARI=", round(ARI,2), ", RI=", round(RI,2), sep=""), ylim=ylimit, xlim=xlimit, lwd=pch, cex=1.3, cex.lab=1.3, cex.axis=1.2, cex.main=1.5)
plot(pca$x[, 1:2], pch=pch, main=paste("SNF result, ", dataset, ", ARI=", round(ARI,2), ", RI=", round(RI,2), sep=""), ylim=ylimit, xlim=xlimit, lwd=2, cex=2, cex.lab=1.7, cex.axis=1.5, cex.main=2)
#title(paste("ARI=", round(ARI,3), ", RI=", round(RI,3), sep=""), line=0.5, cex.main=2)
legend("topright",legend=conditions, pch=seq(conditions), cex=1.7)
dev.off()






# Consensus clustering
datasets = c("GSE10245", "GSE19188", "GSE43580", "GSE15061", "GSE14924", "Lung2001", "AML2004", "Brain2002")
CC_ARIs = NULL
CC_ClusterNos = c(6, 4, 3, 6, 7, 8, 5, 5) #we get this from the CDF plots
for (i in 1:length(datasets)) {
  dataset=datasets[i]
  k=CC_ClusterNos[i]
  
  file=paste(CCPath, "CC_", dataset, ".RData", sep="")
  load(file)
  
  conditions=unique(group[,2]); pch=rep(1, nrow(data))
  for (j in 1:length(conditions)) {
    pch[rownames(data)%in%rownames(group)[group[,2]==conditions[j]]]=j
  }  
  
  xlimit=c(min(pca$x[,1]), max(pca$x[,1])+10)
  ylimit=c(min(pca$x[,2]-20), max(pca$x[,2])+10)
  
  pdfFile=paste(pdfPath, "CC_GEO_", dataset, ".pdf", sep="")
  pdf(pdfFile)
  
  memb=results[[k]]$consensusClass
  
  ARI=randIndex(memb,group[,2], correct = T); CC_ARIs[i]=ARI
  RI=randIndex(memb,group[,2], correct = F)
  par(tcl=0.3,mgp=c(1.4,0.2,0),mar=c(3,3,3,1), xpd=T)  
  #plot(pca$x[, 1:2], pch=pch, col=memb, main=paste("PINS result, ", dataset, ", ARI=", round(ARI,2), ", RI=", round(RI,2), sep=""), ylim=ylimit, xlim=xlimit, lwd=pch, cex=1.3, cex.lab=1.3, cex.axis=1.2, cex.main=1.5)
  plot(pca$x[, 1:2], pch=pch, col=memb, main=paste("CC, ", dataset, ", ARI=", round(ARI,2), ", RI=", round(RI,2), sep=""), ylim=ylimit, xlim=xlimit, lwd=2, cex=2, cex.lab=1.7, cex.axis=1.5, cex.main=2)
  #title(paste("ARI=", round(ARI,3), ", RI=", round(RI,3), sep=""), line=0.5, cex.main=2)
  legend("topright",legend=conditions, pch=seq(conditions), cex=1.7)
  legend("bottomright",legend=paste("Cluster ", sort(unique(memb)), sep=""), fill=sort(unique(memb)), ncol=ceiling(length(unique(memb))/4), cex=1.7)
  dev.off()
}


#iClusterPlus
library(iClusterPlus)
# datasets = c("GSE10245", "GSE19188", "GSE43580", "GSE15061", "GSE14924", "Lung2001", "AML2004", "Brain2002")
iCluster_ARIs = NULL
iCluster_Ks = c(2, 5, 6, 9, 2, 5, 3, 2) #we get this from the devRatMinBIC plots
for (i in 1:length(datasets)) {
    dataset=datasets[i]
    k=iCluster_Ks[i]
    
    file=paste(iClusterPlusPath, "iClusterPlus_", dataset, ".RData", sep="")
    load(file)
    
    conditions=unique(group[,2]); pch=rep(1, nrow(data))
    for (j in 1:length(conditions)) {
        pch[rownames(data)%in%rownames(group)[group[,2]==conditions[j]]]=j
    }  
    
    xlimit=c(min(pca$x[,1]), max(pca$x[,1])+10)
    ylimit=c(min(pca$x[,2]-20), max(pca$x[,2])+10)
    
    pdfFile=paste(pdfPath, "iClusterPlus_GEO_", dataset, ".pdf", sep="")
    pdf(pdfFile)
    
    clusters=getClusters(cv.fit)
    rownames(clusters)=rownames(data)
    colnames(clusters)=paste("K=",2:(length(cv.fit)+1),sep="")
    memb=clusters[,k]
    
    ARI=randIndex(memb,group[,2], correct = T); iCluster_ARIs[i]=ARI
    RI=randIndex(memb,group[,2], correct = F)
    par(tcl=0.3,mgp=c(1.4,0.2,0),mar=c(3,3,3,1), xpd=T)  
    plot(pca$x[, 1:2], pch=pch, col=memb, main=paste("iClusterPlus, ", dataset, ", ARI=", round(ARI,2), ", RI=", round(RI,2), sep=""), ylim=ylimit, xlim=xlimit, lwd=2, cex=2, cex.lab=1.7, cex.axis=1.5, cex.main=2)
    legend("topright",legend=conditions, pch=seq(conditions), cex=1.7)
    legend("bottomright",legend=paste("Cluster ", sort(unique(memb)), sep=""), fill=sort(unique(memb)), ncol=ceiling(length(unique(memb))/4), cex=1.7)
    dev.off()
}


names(PINS_ARIs)=names(CC_ARIs)=names(SNF_ARIs)=names(iCluster_ARIs)=datasets

newNames = c("Lung2001", "AML2004", "Brain2002", "GSE10245", "GSE19188", "GSE43580", "GSE15061", "GSE14924")

A=rbind(PINS_ARIs[newNames], CC_ARIs[newNames], SNF_ARIs[newNames], iCluster_ARIs[newNames])
colnames(A)=NULL
pdfFile=paste(pdfPath, "ARI.pdf", sep="")
pdf(pdfFile)
par(tcl=0.3,mgp=c(1.7,0.2,0),mar=c(5,3,3,1), xpd=F)
x=barplot(A, beside=T, col=c("green", "blue", "cyan", "red"), border=F, ylab="Adusted Rand Index", main="Comparison using 8 gene expression datasets", ylim=c(0,1), cex=2, cex.lab=1.6, cex.axis=1.4, cex.main=1.8, font.main=1)
text(x=x[2,]-1, y=par("usr")[3]-0.06,labels = newNames, srt = 45, pos = 1, xpd = TRUE, cex=1.4)
#grid(ny=5, col="black", nx=0)
legend("topleft",legend=c("PINS", "CC", "SNF", "iClusterPlus"), fill=c("green", "blue", "cyan", "red"), cex=1.7, bty="n", border=F)
box(bty="l")
dev.off()



