rm(list=ls())
set.seed(1)
dataPath="~/Desktop/Projects/SubTyping/METABRIC/"
resultPath="~/Desktop/Projects/SubTyping/PackageAndTesting/SNFResult/"

# dataPath="/wsu/home/ex/ex60/ex6091/Subtyping/PackageAndTesting/DataTCGA/"
# resultPath="/wsu/home/ex/ex60/ex6091/Subtyping/PackageAndTesting/SNFResult/"

#load this last to reserve the distance function functions. 
#please restart your R
#For unknown reason, loading some other packages may change the SNF results slightly
library(SNFtool)


## First, set all the parameters:
K = 20;##number of neighbors, usually (10~30)
alpha = 0.5; ##hyperparameter, usually (0.3~0.8)
NIT = 10; ###Number of Iterations, usually (10~20)

datasets=c("METABRIC_discovery","METABRIC_validation")

for (dataset in datasets) {
    file=paste(dataPath, dataset, ".RData" ,sep="")
    load(file)
    
    t1=Sys.time()
    
    mydatGE=mydatGE[,colSums(is.na(mydatGE)) == 0]
    mydatCNV=t(mydatCNV[,-c(1:5)])
    
    patients=rownames(survival)
    patients=intersect(patients,rownames(mydatGE))
    patients=intersect(patients,rownames(mydatCNV))
    
    mydatGE=mydatGE[patients,]
    mydatCNV=mydatCNV[patients,]
    
    # Normalization
    mydatGE = standardNormalization(mydatGE)
    mydatCNV = standardNormalization(mydatCNV)
    
    
    PSM_GE = dist2(as.matrix(mydatGE),as.matrix(mydatGE));
    PSM_CNV = dist2(as.matrix(mydatCNV),as.matrix(mydatCNV));
    
    W1 = affinityMatrix(PSM_GE, K, alpha)
    W2 = affinityMatrix(PSM_CNV, K, alpha)
    W = SNF(list(W1,W2), K, NIT)
    
    #####Clustering
    #Groups with just geneExpression
    CGE = estimateNumberOfClustersGivenGraph(W1, 2:10)[[1]]
    groupGE = spectralClustering(W1,CGE);
    #Groups with CNV
    CCNV = estimateNumberOfClustersGivenGraph(W2, 2:10)[[1]]
    groupCNV = spectralClustering(W2,CCNV);
    #Groups with SNF
    C = estimateNumberOfClustersGivenGraph(W, 2:10)[[1]]
    groupSNF = spectralClustering(W,C); #the final subtypes information
    
    
    t2=Sys.time()
    print(t2-t1)
    
    resultFile=paste(resultPath, "SNF_", dataset, ".RData" ,sep="")
    save(patients, groupGE, groupCNV, groupSNF, survival, survivalDFS, clinical, t2, t1, file=resultFile)
}





library(survival)
for (dataset in datasets) {
    resultFile=paste(resultPath, "SNF_", dataset, ".RData" ,sep="")
    load(file=resultFile)
    
    pdfFile=paste(resultPath, "SNF_", dataset, ".pdf" ,sep="")
    pdf(pdfFile)
    
    survi=survivalDFS[patients,]
    
    #Cox log rank test GE: Gene Expression
    groups <- factor(groupGE); names(groups) = rownames(survi)
    coxp=round(summary(coxph(Surv(time = Survival, event = Death) ~ groups, data = survi, ties="exact"))$sctest[3],digits = 10)
    CI=round(summary(coxph(Surv(time = Survival, event = Death) ~ groups, data = survi))$concordance[1],3)
    mfit <- survfit(Surv(Survival, Death == 1) ~ factor(groups), data = survi)
    plot(mfit, col=unique(groups), main = paste("DFS survival curves for gene expression (SNF)", sep=""), xlab = "Days", ylab="Survival", lwd=2)
    legend("top", legend = paste("Cox p-value:", coxp, ", CI=",CI, sep=""))  
    
    #Cox log rank test CNV
    groups <- factor(groupCNV); names(groups) = rownames(survi)
    coxp=round(summary(coxph(Surv(time = Survival, event = Death) ~ groups, data = survi, ties="exact"))$sctest[3],digits = 10)
    CI=round(summary(coxph(Surv(time = Survival, event = Death) ~ groups, data = survi))$concordance[1],3)
    mfit <- survfit(Surv(Survival, Death == 1) ~ factor(groups), data = survi)
    plot(mfit, col=unique(groups), main = paste("DFS survival curves for CNV (SNF)", sep=""), xlab = "Days", ylab="Survival", lwd=2)
    legend("top", legend = paste("Cox p-value:", coxp, ", CI=",CI, sep=""))  
    
    
    #Cox SNF
    groups <- factor(groupSNF); names(groups) = rownames(survi)
    coxp=round(summary(coxph(Surv(time = Survival, event = Death) ~ groups, data = survi, ties="exact"))$sctest[3],digits = 10)
    CI=round(summary(coxph(Surv(time = Survival, event = Death) ~ groups, data = survi))$concordance[1],3)
    mfit <- survfit(Surv(Survival, Death == 1) ~ factor(groups), data = survi)
    plot(mfit, col=unique(groups), main = paste("DFS survival curves for combined data (SNF)", sep=""), xlab = "Days", ylab="Survival", lwd=2)
    legend("top", legend = paste("Cox p-value:", coxp, ", CI=",CI, sep=""))  
    
    
    survi=survival[patients,]
    
    #Cox log rank test GE: Gene Expression
    groups <- factor(groupGE); names(groups) = rownames(survi)
    coxp=round(summary(coxph(Surv(time = Survival, event = Death) ~ groups, data = survi, ties="exact"))$sctest[3],digits = 10)
    CI=round(summary(coxph(Surv(time = Survival, event = Death) ~ groups, data = survi))$concordance[1],3)
    mfit <- survfit(Surv(Survival, Death == 1) ~ factor(groups), data = survi)
    plot(mfit, col=unique(groups), main = paste("Overall survival curves for gene expression (SNF)", sep=""), xlab = "Days", ylab="Survival", lwd=2)
    legend("top", legend = paste("Cox p-value:", coxp, ", CI=",CI, sep=""))  
    
    #Cox log rank test CNV
    groups <- factor(groupCNV); names(groups) = rownames(survi)
    coxp=round(summary(coxph(Surv(time = Survival, event = Death) ~ groups, data = survi, ties="exact"))$sctest[3],digits = 10)
    CI=round(summary(coxph(Surv(time = Survival, event = Death) ~ groups, data = survi))$concordance[1],3)
    mfit <- survfit(Surv(Survival, Death == 1) ~ factor(groups), data = survi)
    plot(mfit, col=unique(groups), main = paste("Overall survival curves for CNV (SNF)", sep=""), xlab = "Days", ylab="Survival", lwd=2)
    legend("top", legend = paste("Cox p-value:", coxp, ", CI=",CI, sep=""))  
    
    
    #Cox SNF
    groups <- factor(groupSNF); names(groups) = rownames(survi)
    coxp=round(summary(coxph(Surv(time = Survival, event = Death) ~ groups, data = survi, ties="exact"))$sctest[3],digits = 10)
    CI=round(summary(coxph(Surv(time = Survival, event = Death) ~ groups, data = survi))$concordance[1],3)
    mfit <- survfit(Surv(Survival, Death == 1) ~ factor(groups), data = survi)
    plot(mfit, col=unique(groups), main = paste("Overall survival curves for combined data (SNF)", sep=""), xlab = "Days", ylab="Survival", lwd=2)
    legend("top", legend = paste("Cox p-value:", coxp, ", CI=",CI, sep=""))  
    dev.off()
}
  
  
  