rm(list=ls())
PINSPath="~/Desktop/Projects/Subtyping/PackageAndTesting/PINSResult/"
CCPath="~/Desktop/Projects/Subtyping/PackageAndTesting/CCResult/"
SNFPath="~/Desktop/Projects/Subtyping/PackageAndTesting/SNFResult/"
iClusterPlusPath="~/Desktop/Projects/Subtyping/PackageAndTesting/iClusterPlusResult/"
pdfPath="~/Desktop/Projects/Subtyping/PackageAndTesting/Plots/Figures/"

library(scales)

nrow=100;ncol=1000
clusRange=2:10

resultFile= paste(PINSPath, "PINS_Gaussian1.RData", sep="")
load(resultFile)

pdfFile=paste(pdfPath, "PINS_Data_Gaussian1.pdf", sep="")
pdf(pdfFile, height=1.9*3, width=7)
par(tcl=0.3,mgp=c(1.3,0.3,0),mar=c(2.5,2.5,2.5,1), xpd=T)
plot(1,0, xlim=c(1,ncol), ylim=c(-6,6), cex=0.00001, xlab="Genes", ylab="Expression", main="Gene expression of Gaussian1", cex.lab=1.4, cex.axis=1.3, cex.main=1.7)  
for (i in 1:(nrow/20)) {
  #points(seq(ncol), dataG[i,], cex=10^(-10), col=alpha(1, 0.5), lwd=2)
  points(seq(ncol), dataG[i,], cex=0.3, col=alpha(1, 0.5), pch=19)
}
dev.off()

#plot connectivity matrix Gaussian1
my_palette <- colorRampPalette(c("white", "blue"))(n = 200)
for (i in c(2,4,10)) {
  h=heatmap(resultG$orig[[i]], col=my_palette, labRow=NA, labCol=NA, sym=T, add.expr = box(which="plot", lty="solid", lwd=2, bty="o"), margins=c(0.5,0.5), main="Connectivity")
  
  pdfFile=paste(pdfPath,"PINS_Data_Gaussian1_orig_", i, ".pdf", sep="")
  pdf(pdfFile, height=7.6, width=7)
  par(cex.main=2.3, oma = c(1.5,3,0,0))
  heatmap(resultG$origS[[i]][h$rowInd, h$colInd], col=my_palette, labRow=NA, labCol=NA, sym=T, Rowv = NA, 
          add.expr = box(which="plot", lty="solid", lwd=2, bty="o"), 
          margins=c(0.5,0.5), main=paste("Original connectivity, k=", i, sep=""))
  mtext("Patients", side=2, cex=3, line=5)
  mtext("Patients", side=1, cex=3, line=5.5, at=0.45)
  dev.off()
  
  h=heatmap(resultG$pert[[i]], col=my_palette, labRow=NA, labCol=NA, sym=T, add.expr = box(which="plot", lty="solid", lwd=2, bty="o"), margins=c(0.5,0.5), main="Connectivity")
  pdfFile=paste(pdfPath,"PINS_Data_Gaussian1_pert_", i, ".pdf", sep="")
  pdf(pdfFile, height=7.6, width=7)
  par(cex.main=2.3, oma = c(1.5,3,0,0))
  heatmap(resultG$pert[[i]][h$rowInd, h$colInd], col=my_palette, labRow=NA, labCol=NA, sym=T, Rowv = NA, 
          add.expr = box(which="plot", lty="solid", lwd=2, bty="o"), 
          margins=c(0.5,0.5), main=paste("Perturbed connectivity, k=", i, sep=""))
  mtext("Patients", side=2, cex=3, line=5)
  mtext("Patients", side=1, cex=3, line=5.5, at=0.45)
  dev.off()
}

pdfFile=paste(pdfPath, "PINS_CDF_Gaussian1.pdf", sep="")
pdf(pdfFile, width=7, height=6.5)
par(tcl=0.3,mgp=c(1.7,0.4,0),mar=c(3,3,3,3), xpd=T)  
plot(resultG$Discrepancy$Entry[[2]], resultG$Discrepancy$CDF[[2]],cex=0.00001, ylim=c(0,1), xlab="Entries", ylab="CDF", main=paste("Cumulative distribution functions, Gaussian1", sep=""), cex.lab=1.4, cex.axis=1.3, cex.main=1.7)
for (clus in clusRange) {
  lines(resultG$Discrepancy$Entry[[clus]], resultG$Discrepancy$CDF[[clus]], col=rainbow(10)[clus-1], lwd=2)
}
legend=NULL; for (k in clusRange) {legend[k]=paste("k=", k, ", AUC=", round(resultG$Discrepancy$AUC[k], digits = 3), sep="")}
legend("bottomright",legend=legend[clusRange],fill=rainbow(10), cex=1.5)
dev.off()

#plot AUC for Gaussian1
M=apply(AUC, 2, mean); V=apply(AUC, 2, sd)*2; LI=M-V; UI=M+V

pdfFile=paste(pdfPath,"PINS_AUC_Gaussian1.pdf", sep="")
pdf(pdfFile, width=7, height=6.5)
par(tcl=0.3,mgp=c(1.7,0.4,0),mar=c(3,3,3,3), xpd=T)  
plot(c(1,clusRange), M, xlim=c(2,10), ylim=c(0.5,1), cex=0.5, xlab="Number of clusters", ylab="AUC", main="Area under the curve, Gaussian1", cex.lab=1.4, cex.axis=1.3, cex.main=1.7)
legend <- NULL; fill <- NULL

legend[length(legend)+1]="Gaussian1"
fill[length(fill)+1]=(if (is.null(fill)) 0 else max(fill))+1
lines(M, lwd=2, col=max(fill))
for (i in clusRange) {segments(x0=i, y0=LI[i], x1=i, y1=UI[i], lwd=4)}

resultFile=paste(PINSPath,"PINS_Uniform1.RData" ,sep="")
load(resultFile)
legend[length(legend)+1]="Uniform1"
fill[length(fill)+1]=(if (is.null(fill)) 0 else max(fill))+1
points(c(1,clusRange), resultU$Discrepancy$AUC, cex=0.5, col=max(fill))
lines(resultU$Discrepancy$AUC, lwd=2, col=max(fill))

legend("bottomright", legend=legend, fill=fill, cex=1.5)
dev.off()
# FINISHED PLOTTING FIGURES FOR GAUSSIAN1 DATASET------------------------------------------




# PLOT FIGURES FOR GAUSSIAN2, GAUSSIAN3, GAUSSIAN5, and GAUSSIAN9 DATASETs------------------------------------------
classes=2
resultFile=paste(PINSPath, "PINS_Gaussian", classes, ".RData", sep="")
load(resultFile)
for (i in 1:classes) {
  pdfFile=paste(pdfPath, "PINS_Data_Gaussian", classes, "_", i, ".pdf", sep="")
  pdf(pdfFile, height=3, width=7)
  par(tcl=0.3,mgp=c(1.3,0.3,0),mar=c(2.5,2.5,2.5,1), xpd=T)
  plot(1,0, xlim=c(1,ncol), ylim=c(-6,6), cex=0.00001, xlab="Genes", ylab="Expression", main=paste("Gene expression of class ", i , ", Dataset", classes, sep=""), cex.lab=1.4, cex.axis=1.3, cex.main=1.7)  
  A=rownames(group)[group[,2]==i]
  for (j in A[1:3]) points(seq(ncol), dataG[j,], cex=0.2, col=alpha(1, 0.5), pch=19)
  dev.off()
}

classes=3
resultFile=paste(PINSPath, "PINS_Gaussian", classes, ".RData", sep="")
load(resultFile)
for (i in 1:classes) {
  pdfFile=paste(pdfPath, "PINS_Data_Gaussian", classes, "_", i, ".pdf", sep="")
  pdf(pdfFile, height=2.2, width=7)
  par(tcl=0.3,mgp=c(1.3,0.3,0),mar=c(2.5,2.5,2.5,1), xpd=T)
  plot(1,0, xlim=c(1,ncol), ylim=c(-6,6), cex=0.00001, xlab="Genes", ylab="Expression", main=paste("Gene expression of class ", i , ", Dataset", classes, sep=""), cex.lab=1.4, cex.axis=1.3, cex.main=1.7)  
  A=rownames(group)[group[,2]==i]
  for (j in A[1:3]) points(seq(ncol), dataG[j,], cex=0.2, col=alpha(1, 0.5), pch=19)
  dev.off()
}

classes=5
resultFile=paste(PINSPath, "PINS_Gaussian", classes, ".RData", sep="")
load(resultFile)
for (i in c(1,2,5)) {
  pdfFile=paste(pdfPath, "PINS_Data_Gaussian", classes, "_", i, ".pdf", sep="")
  pdf(pdfFile, height=1.9, width=7)
  par(tcl=0.3,mgp=c(1.3,0.3,0),mar=c(2.5,2.5,2.5,1), xpd=T)
  plot(1,0, xlim=c(1,ncol), ylim=c(-6,6), cex=0.00001, xlab="Genes", ylab="Expression", main=paste("Gene expression of class ", i , ", Dataset", classes, sep=""), cex.lab=1.4, cex.axis=1.3, cex.main=1.7)  
  A=rownames(group)[group[,2]==i]
  for (j in A[1:3]) points(seq(ncol), dataG[j,], cex=0.2, col=alpha(1, 0.4), pch=19)
  dev.off()
}

classes=9
resultFile=paste(PINSPath, "PINS_Gaussian", classes, ".RData", sep="")
load(resultFile)
for (i in c(1,2,9)) {
  pdfFile=paste(pdfPath, "PINS_Data_Gaussian", classes, "_", i, ".pdf", sep="")
  pdf(pdfFile, height=1.9, width=7)
  par(tcl=0.3,mgp=c(1.3,0.3,0),mar=c(2.5,2.5,2.5,1), xpd=T)
  plot(1,0, xlim=c(1,ncol), ylim=c(-6,6), cex=0.00001, xlab="Genes", ylab="Expression", main=paste("Gene expression of class ", i , ", Dataset", classes, sep=""), cex.lab=1.4, cex.axis=1.3, cex.main=1.7)  
  A=rownames(group)[group[,2]==i]
  for (j in A[1:3]) points(seq(ncol), dataG[j,], cex=0.2, col=alpha(1, 0.4), pch=19)
  dev.off()
}



#plot connectivity matrices
for (classes in c(2, 3, 5, 9)) {
  clusRange=2:10
  
  resultFile=paste(PINSPath, "PINS_Gaussian", classes, ".RData", sep="")
  load(resultFile)
  
  my_palette <- colorRampPalette(c("white", "blue"))(n = 200)
  
  if (classes==3) {range=c(2,3,4,5,10)}
  if (classes==2) {range=c(2,3,10)}
  if (classes==5) {range=c(3,5,10)}
  if (classes==9) {range=c(2,9,10)}

  for (i in range) {
    h=heatmap(resultG$orig[[i]], col=my_palette, labRow=NA, labCol=NA, sym=T, add.expr = box(which="plot", lty="solid", lwd=2, bty="o"), margins=c(0.5,0.5), main="Connectivity")
    
    pdfFile=paste(pdfPath, "PINS_Data_Gaussian", classes, "_orig_", i, ".pdf", sep="")
    pdf(pdfFile, height=7.6, width=7)
    par(cex.main=2.3, oma = c(1.5,3,0,0))
    heatmap(resultG$orig[[i]][h$rowInd, h$colInd], col=my_palette, labRow=NA, labCol=NA, sym=T, Rowv = NA, add.expr = box(which="plot", lty="solid", lwd=2, bty="o"), margins=c(0.5,0.5), main=paste("Original connectivity, k=", i, sep=""))
    mtext("Patients", side=2, cex=3, line=5)
    mtext("Patients", side=1, cex=3, line=5.5, at=0.45)
    dev.off()
    
    h=heatmap(resultG$pert[[i]], col=my_palette, labRow=NA, labCol=NA, sym=T, add.expr = box(which="plot", lty="solid", lwd=2, bty="o"), margins=c(0.5,0.5), main="Connectivity")
    
    pdfFile=paste(pdfPath, "PINS_Data_Gaussian", classes, "_pert_", i, ".pdf", sep="")
    pdf(pdfFile, height=7.6, width=7)
    par(cex.main=2.3, oma = c(1.5,3,0,0))
    heatmap(resultG$pert[[i]][h$rowInd, h$colInd], col=my_palette, labRow=NA, labCol=NA, sym=T, Rowv = NA, add.expr = box(which="plot", lty="solid", lwd=2, bty="o"), margins=c(0.5,0.5), main=paste("Perturbed connectivity, k=", i, sep=""))
    mtext("Patients", side=2, cex=3, line=5)
    mtext("Patients", side=1, cex=3, line=5.5, at=0.45)
    dev.off()
  }
}


#plot CDF
for (classes in c(2,3,5,9)) {
  resultFile=paste(PINSPath, "PINS_Gaussian", classes, ".RData", sep="")
  load(resultFile)
  
  pdfFile=paste(pdfPath, "PINS_CDF_Gaussian", classes, ".pdf", sep="")
  pdf(pdfFile, width=7, height=6.5)
  par(tcl=0.3,mgp=c(1.7,0.4,0),mar=c(3,3,3,3), xpd=T)  
  plot(resultG$Discrepancy$Entry[[2]], resultG$Discrepancy$CDF[[2]],cex=0.01, ylim=c(0,1), xlab="Entries", ylab="CDF", main=paste("Cumulative distribution functions, Dataset", classes, sep=""), cex.lab=1.4, cex.axis=1.3, cex.main=1.7)
  for (clus in clusRange) {
    lines(resultG$Discrepancy$Entry[[clus]], resultG$Discrepancy$CDF[[clus]], col=rainbow(10)[clus-1], lwd=2)
  }
  legend=NULL; for (k in clusRange) {legend[k]=paste("k=", k, ", AUC=", round(resultG$Discrepancy$AUC[k], digits = 3), sep="")}
  font=rep(1,10)
  font[classes]=2
  legend("bottomright",legend=legend[clusRange],fill=rainbow(10), text.font=font[clusRange], cex=1.5)
  dev.off()
}

#plot AUC for Gaussian2
load(paste(PINSPath,"PINS_Gaussian1.RData" ,sep=""))
M=apply(AUC, 2, mean); V=apply(AUC, 2, sd)*2; LI=M-V; UI=M+V

for (classes in c(2,3,5,9)) {
  resultFile=paste(PINSPath,"PINS_Gaussian", classes, ".RData" ,sep="")
  load(resultFile)
  pdfFile=paste(pdfPath,"PINS_AUC_Gaussian", classes, ".pdf", sep="")
  pdf(pdfFile, width=7, height=6.5)
  par(tcl=0.3,mgp=c(1.7,0.4,0),mar=c(3,3,3,3), xpd=T) 
  plot(2, 1, xlim=c(2,10), ylim=c(0.5,1), cex=0, xlab="Number of clusters", ylab="AUC", main=paste("Area under the curve, Dataset", classes, sep=""), cex.lab=1.4, cex.axis=1.3, cex.main=1.7)
  points(c(1,clusRange), M, cex=0.5, col="black")
  lines(M, lwd=2)
  for (i in clusRange) {segments(x0=i, y0=LI[i], x1=i, y1=UI[i], lwd=4)}
  
  points(c(1,clusRange), resultG$Discrepancy$AUC, cex=0.5, col="red")
  lines(resultG$Discrepancy$AUC, lwd=2, col="red")
  #points(classes, resultG$Discrepancy$AUC[classes], cex=2, col="red", lwd=3)
  
  diff=resultG$Discrepancy$AUC - M + 0.5
  points(c(1,clusRange), diff, cex=0.5, col="blue")
  lines(diff, col="blue", lwd=2)
  #points(classes, diff[classes], cex=2, col="blue", lwd=3)
  axis(side=4, at= c(0.5,0.6,0.7,0.8,0.9,1), labels=FALSE, col="blue", cex.axis=1.4)
  at=axTicks(4)
  mtext(side = 4, text = at-0.5, at = at, col = "blue", line = 0.4, cex=1.4)
  mtext(side=4, expression(Delta ~ "AUC"), cex=1.7, col="blue", line=2)
  
  lines(c(classes,classes), c(0.48,1.02))
  legend=c(paste("Dataset", classes, sep=""), "Random", expression(Delta ~ "AUC"))
  if (classes==9) {
      legend("topleft", legend=legend, fill=c("red", "black", "blue"), cex=1.5)
     
      arrows(classes-1, 0.5, classes, 0.5, lwd=2, col="red")
      text(classes-2.8, 0.5, "true number of classes", cex=1.4, col="red")
      
  } else {
      if (classes==5)
        legend("topleft", legend=legend, fill=c("red", "black", "blue"), cex=1.5)
      else
        legend("bottomright", legend=legend, fill=c("red", "black", "blue"), cex=1.5)
      
      arrows(classes+1, 0.5, classes, 0.5, lwd=2, col="red")
      text(classes+2.8, 0.5, "true number of classes", cex=1.4, col="red")
  }
  
  dev.off()
}



#plot AUC for all 10 simulaton datasets
pdfFile=paste(pdfPath,"PINS_AUC_Simulation.pdf", sep="")
pdf(pdfFile)
par(tcl=0.3,mgp=c(1.7,0.4,0),mar=c(3,3,2.5,1), xpd=T)
plot(2, 1, xlim=c(2,10), xaxt='n', ylim=c(0.5,1), cex=0, xlab="Number of clusters", ylab="AUC", main="Area under the curve", cex.lab=1.4, cex.axis=1.3, cex.main=1.7)
axis(side=1, at=2:10, labels=2:10,cex.axis=1.3)
legend <- NULL; fill <- NULL; font<- NULL

for (classes in 1:10) {
  resultFile=paste(PINSPath, "PINS_Gaussian", classes, ".RData" ,sep="")
  load(resultFile)
  font[length(font)+1]=1
  if (classes==1)
    legend[length(legend)+1]=paste("Random", sep="")
  else
    legend[length(legend)+1]=paste("Dataset", classes, sep="")
  if (classes==1) {
    fill[1]=1
    M=apply(AUC, 2, mean); V=apply(AUC, 2, sd)*2; LI=M-V; UI=M+V
    points(1:10, M, cex=0.5, col=max(fill))
    lines(M, lwd=2, col=max(fill))
  } else {
    fill[classes]=rainbow(10)[classes]
    points(1:10, resultG$Discrepancy$AUC, cex=0.5, col=max(fill))
    lines(resultG$Discrepancy$AUC, lwd=2, col=rainbow(10)[classes])
  }
}
legend("bottomright", legend=legend, fill=fill, cex=1.5)
dev.off()


#PINS results
library(flexclust)
for (classes in 2:10) {
    file=paste(PINSPath, "PINS_Gaussian", classes, ".RData", sep="")
    load(file)
    message(randIndex(group$Group,resultG$groups))
}

#CC results
library(flexclust)
for (classes in 2:10) {
    file=paste(CCPath, "CC_Gaussian", classes, ".RData", sep="")
    load(file)
    
    # CC identifies the right number of clusters
    memb=results[[classes]]$consensusClass
    
    message(randIndex(group$Group,memb))
}


#SNF results
for (classes in 2:10) {
    file=paste(SNFPath, "SNF_Gaussian", classes, ".RData", sep="")
    load(file)
    
    message(randIndex(group$Group,groupSNF))
}

#iClusterPlus
library(iClusterPlus)
k=c(1,3,4,5,6,7,8,9,9)
for (classes in 2:10) {
    file=paste(iClusterPlusPath, "iClusterPlus_Gaussian", classes, ".RData", sep="")
    load(file)
    
    nK = length(cv.fit)
    BIC=getBIC(cv.fit)
    devR = getDevR(cv.fit) 
    minBICid = apply(BIC,2,which.min)
    devRatMinBIC = rep(NA,nK)
    for(i in 1:nK){
        devRatMinBIC[i] = devR[minBICid[i],i]
    } 
    plot(devRatMinBIC)
    
    message(randIndex(group$Group,groupSNF))
}





# plot the figures for testing noise sensitivity
pdfPath="~/Desktop/Projects/Subtyping/PackageAndTesting/Plots/Figures/"
library(scales)
ncol=1000;nrow=100;classes=9
for (M in c(4,3,2,1,0.6)) {
    dataG <- matrix(rnorm(nrow*ncol, 0, 1), nrow=nrow, ncol=ncol)
    rownames(dataG)=seq(nrow)
    str=NULL
    for (i in 1:classes) {
        str=c(str, rep(i, nrow/classes))
    }
    if (length(str)<nrow) {str=c(str, rep(classes, nrow-length(str)))}
    group=data.frame(row.names=seq(nrow),Sample=seq(nrow), Group=str)
    for (i in 1:classes) {
        dataG[rownames(group)[group[,2]==i], (100*(i-1)+1):(100*i)]=dataG[rownames(group)[group[,2]==i], (100*(i-1)+1):(100*i)] + M
    }
    
    X=Y=NULL
    for (i in 1:classes) {
        X=c(X, as.numeric(dataG[group$Group==i,-(((i-1)*100):(i*100))]))
        Y=c(Y, as.numeric(dataG[group$Group==i,((i-1)*100+1):(i*100)]))
    }
    pdfFile=paste(pdfPath,"PINS_Simulation_density_", M, ".pdf", sep="")
    pdf(pdfFile)
    par(tcl=0.3,mgp=c(1.4,0.2,0),mar=c(3,3,3,1), xpd=T)  
    hist(X, xlab="Expression values", breaks=50, xlim=c(-3,7), ylim=c(0,0.5), probability=TRUE,
         main=bquote(paste("Histogram of expression values (", mu, "=", .(M),")", sep="")), cex.main=2, cex.lab=1.5, col="blue")
    # lines(density(X), main="",xlab="",ylab="",ylim=c(0,0.6),xlim=c(-3, M+3), lwd=2, col="blue")
    par(new=TRUE)
    hist(Y, breaks=50, , xlim=c(-3,7), ylim=c(0,0.5), col=alpha("red",0.7), probability=TRUE, 
         main="", xlab="", ylab="")
    # lines(density(Y), main="",xlab="",ylab="",ylim=c(0,0.5),xlim=c(-3, M+3), lwd=2, col="red")
    legend(x="topright",legend = c("Unchanged genes","Differential expressed genes"), fill=c("blue","red"), cex=1.3)
    box(bty="o")
    dev.off()
}

