rm(list=ls())

# please set these pathway correctly and check if the data can be loaded
dataPath="~/Desktop/Projects/SubTyping/DataTCGA/"
resultPath="~/Desktop/Projects/SubTyping/PackageAndTesting/CCResult/No_miRNA/"

# dataPath="/wsu/home/ex/ex60/ex6091/Subtyping/PackageAndTesting/DataTCGA/"
# resultPath="/wsu/home/ex/ex60/ex6091/Subtyping/PackageAndTesting/CCResult/2type/"

#IMPORTANT: restart R after installing ConsensusClusterPlus. Otherwise the script crashes with strange message 
#Error: lazy-load database 'P' is corrupt
library(preprocessCore)
library(ConsensusClusterPlus)

# normalize the data as for SNF when concatenate data
standardNormalization <- function (x) 
{
    x = as.matrix(x)
    mean = apply(x, 2, mean)
    sd = apply(x, 2, sd)
    sd[sd == 0] = 1
    xNorm = t((t(x) - mean)/sd)
    return(xNorm)
}

clusRange=2:10

for (dataset in c("LUSC", "GBM", "BRCA", "LAML", "KIRC", "COAD")) {
    prefix=paste(dataPath, dataset, "/", sep="")
    
    file=paste(prefix, dataset, "_ProcessedData.RData" ,sep="")
    load(file)
    
    patients=rownames(survival)
    patients=intersect(patients,rownames(mydatGE))
    patients=intersect(patients,rownames(mydatME))
    patients=intersect(patients,rownames(mydatMI))
    
    mydatGE=as.matrix(mydatGE[patients,])
    mydatME=as.matrix(mydatME[patients,])
    survival=survival[patients,]
        
    

    t1_All=Sys.time()
    d=cbind(mydatGE, mydatME)
    d=standardNormalization(d);d=t(d)
    d = sweep(d,1, apply(d,1,median,na.rm=T))
    path=paste(resultPath, dataset, "/",sep="")
    if (!file.exists(path)) {dir.create(path)}
    result_All = ConsensusClusterPlus(d,maxK=max(clusRange),reps=1000,pItem=0.8,pFeature=1, title=path,clusterAlg="hc",distance="pearson", plot="png", seed=888)
    t2_All=Sys.time()
  
    resultFile=paste(resultPath, "CC_", dataset,".RData",sep="")
    save(result_All, dataset, survival, t1_All, t2_All, file=resultFile)
}


library(survival)
for (dataset in c("KIRC", "GBM", "LAML", "LUSC", "BRCA", "COAD")) {
    resultFile=paste(resultPath, "CC_", dataset,".RData",sep="")
    load(resultFile)

    # we get these from CDF plots
    if (dataset=="KIRC") {
        kAll=6
    } else if (dataset=="GBM") {
        kAll=7
    } else if (dataset=="LAML") {
        kAll=6
    } else if (dataset=="LUSC") {
        kAll=5
    } else if (dataset=="BRCA") {
        kAll=7
    } else if (dataset=="COAD") {
        kAll=5
    }

    
    
    pdfFile=paste(resultPath, "CC_", dataset, ".pdf" ,sep="")  
    pdf(pdfFile)
    
    #Cox All
    groupAll=result_All[[kAll]]$consensusClass
    groups <- factor(groupAll); names(groups) = rownames(survival)
    coxFit <- coxph(Surv(time = Survival, event = Death) ~ groups, data = survival, ties="exact")
    mfit <- survfit(Surv(Survival, Death == 1) ~ factor(groups), data = survival)
    plot(mfit, col=unique(groups), main = paste("Survival curves for combined data of ", dataset, " (CC)", sep=""), xlab = "Days", ylab="Survival", lwd=2)
    legend("top", legend = paste("Cox p-value:", round(summary(coxFit)$sctest[3],digits = 5), sep=""))
    legend("topright", fill=levels(factor(groups)), legend=paste("Group ",levels(factor(groups)), ": ", table(groups)[levels(factor(groups))], sep=""))
    
    dev.off()
}






###### NOW INTEGRAGE METHYLATION AND MIRNA
rm(list=ls())

# please set these pathway correctly and check if the data can be loaded
dataPath="~/Desktop/Projects/SubTyping/DataTCGA/"
resultPath="~/Desktop/Projects/SubTyping/PackageAndTesting/CCResult/No_mRNA/"

# dataPath="/wsu/home/ex/ex60/ex6091/Subtyping/PackageAndTesting/DataTCGA/"
# resultPath="/wsu/home/ex/ex60/ex6091/Subtyping/PackageAndTesting/CCResult/2type/"

#IMPORTANT: restart R after installing ConsensusClusterPlus. Otherwise the script crashes with strange message 
#Error: lazy-load database 'P' is corrupt
library(preprocessCore)
library(ConsensusClusterPlus)

# normalize the data as for SNF when concatenate data
standardNormalization <- function (x) 
{
  x = as.matrix(x)
  mean = apply(x, 2, mean)
  sd = apply(x, 2, sd)
  sd[sd == 0] = 1
  xNorm = t((t(x) - mean)/sd)
  return(xNorm)
}

clusRange=2:10

for (dataset in c("LUSC", "GBM", "BRCA", "LAML", "KIRC", "COAD")) {
  prefix=paste(dataPath, dataset, "/", sep="")
  
  file=paste(prefix, dataset, "_ProcessedData.RData" ,sep="")
  load(file)
  
  patients=rownames(survival)
  patients=intersect(patients,rownames(mydatGE))
  patients=intersect(patients,rownames(mydatME))
  patients=intersect(patients,rownames(mydatMI))
  
  mydatME=as.matrix(mydatME[patients,])
  mydatMI=as.matrix(mydatMI[patients,])
  survival=survival[patients,]
  
  
  
  t1_All=Sys.time()
  d=cbind(mydatME, mydatMI)
  d=standardNormalization(d);d=t(d)
  d = sweep(d,1, apply(d,1,median,na.rm=T))
  path=paste(resultPath, dataset, "/",sep="")
  if (!file.exists(path)) {dir.create(path)}
  result_All = ConsensusClusterPlus(d,maxK=max(clusRange),reps=1000,pItem=0.8,pFeature=1, title=path,clusterAlg="hc",distance="pearson", plot="png", seed=888)
  t2_All=Sys.time()
  
  resultFile=paste(resultPath, "CC_", dataset,".RData",sep="")
  save(result_All, dataset, survival, t1_All, t2_All, file=resultFile)
}


library(survival)
for (dataset in c("KIRC", "GBM", "LAML", "LUSC", "BRCA", "COAD")) {
  resultFile=paste(resultPath, "CC_", dataset,".RData",sep="")
  load(resultFile)
  
  # we get these from CDF plots
  if (dataset=="KIRC") {
    kAll=9
  } else if (dataset=="GBM") {
    kAll=6
  } else if (dataset=="LAML") {
    kAll=7
  } else if (dataset=="LUSC") {
    kAll=8
  } else if (dataset=="BRCA") {
    kAll=7
  } else if (dataset=="COAD") {
    kAll=8
  }
  
  
  
  pdfFile=paste(resultPath, "CC_", dataset, ".pdf" ,sep="")  
  pdf(pdfFile)
  
  #Cox All
  groupAll=result_All[[kAll]]$consensusClass
  groups <- factor(groupAll); names(groups) = rownames(survival)
  coxFit <- coxph(Surv(time = Survival, event = Death) ~ groups, data = survival, ties="exact")
  mfit <- survfit(Surv(Survival, Death == 1) ~ factor(groups), data = survival)
  plot(mfit, col=unique(groups), main = paste("Survival curves for combined data of ", dataset, " (CC)", sep=""), xlab = "Days", ylab="Survival", lwd=2)
  legend("top", legend = paste("Cox p-value:", round(summary(coxFit)$sctest[3],digits = 5), sep=""))
  legend("topright", fill=levels(factor(groups)), legend=paste("Group ",levels(factor(groups)), ": ", table(groups)[levels(factor(groups))], sep=""))
  
  dev.off()
}










###### NOW INTEGRAGE MRNA AND MIRNA
rm(list=ls())

# please set these pathway correctly and check if the data can be loaded
dataPath="~/Desktop/Projects/SubTyping/DataTCGA/"
resultPath="~/Desktop/Projects/SubTyping/PackageAndTesting/CCResult/No_methyl/"

# dataPath="/wsu/home/ex/ex60/ex6091/Subtyping/PackageAndTesting/DataTCGA/"
# resultPath="/wsu/home/ex/ex60/ex6091/Subtyping/PackageAndTesting/CCResult/2type/"

#IMPORTANT: restart R after installing ConsensusClusterPlus. Otherwise the script crashes with strange message 
#Error: lazy-load database 'P' is corrupt
library(preprocessCore)
library(ConsensusClusterPlus)

# normalize the data as for SNF when concatenate data
standardNormalization <- function (x) 
{
  x = as.matrix(x)
  mean = apply(x, 2, mean)
  sd = apply(x, 2, sd)
  sd[sd == 0] = 1
  xNorm = t((t(x) - mean)/sd)
  return(xNorm)
}

clusRange=2:10

for (dataset in c("LUSC", "GBM", "BRCA", "LAML", "KIRC", "COAD")) {
  prefix=paste(dataPath, dataset, "/", sep="")
  
  file=paste(prefix, dataset, "_ProcessedData.RData" ,sep="")
  load(file)
  
  patients=rownames(survival)
  patients=intersect(patients,rownames(mydatGE))
  patients=intersect(patients,rownames(mydatME))
  patients=intersect(patients,rownames(mydatMI))
  
  mydatGE=as.matrix(mydatGE[patients,])
  mydatMI=as.matrix(mydatMI[patients,])
  survival=survival[patients,]
  
  
  
  t1_All=Sys.time()
  d=cbind(mydatGE, mydatMI)
  d=standardNormalization(d);d=t(d)
  d = sweep(d,1, apply(d,1,median,na.rm=T))
  path=paste(resultPath, dataset, "/",sep="")
  if (!file.exists(path)) {dir.create(path)}
  result_All = ConsensusClusterPlus(d,maxK=max(clusRange),reps=1000,pItem=0.8,pFeature=1, title=path,clusterAlg="hc",distance="pearson", plot="png", seed=888)
  t2_All=Sys.time()
  
  resultFile=paste(resultPath, "CC_", dataset,".RData",sep="")
  save(result_All, dataset, survival, t1_All, t2_All, file=resultFile)
}


library(survival)
for (dataset in c("KIRC", "GBM", "LAML", "LUSC", "BRCA", "COAD")) {
  resultFile=paste(resultPath, "CC_", dataset,".RData",sep="")
  load(resultFile)
  
  # we get these from CDF plots
  if (dataset=="KIRC") {
    kAll=7
  } else if (dataset=="GBM") {
    kAll=7
  } else if (dataset=="LAML") {
    kAll=5
  } else if (dataset=="LUSC") {
    kAll=6
  } else if (dataset=="BRCA") {
    kAll=8
  } else if (dataset=="COAD") {
    kAll=5
  }
  
  
  
  pdfFile=paste(resultPath, "CC_", dataset, ".pdf" ,sep="")  
  pdf(pdfFile)
  
  #Cox All
  groupAll=result_All[[kAll]]$consensusClass
  groups <- factor(groupAll); names(groups) = rownames(survival)
  coxFit <- coxph(Surv(time = Survival, event = Death) ~ groups, data = survival, ties="exact")
  mfit <- survfit(Surv(Survival, Death == 1) ~ factor(groups), data = survival)
  plot(mfit, col=unique(groups), main = paste("Survival curves for combined data of ", dataset, " (CC)", sep=""), xlab = "Days", ylab="Survival", lwd=2)
  legend("top", legend = paste("Cox p-value:", round(summary(coxFit)$sctest[3],digits = 5), sep=""))
  legend("topright", fill=levels(factor(groups)), legend=paste("Group ",levels(factor(groups)), ": ", table(groups)[levels(factor(groups))], sep=""))
  
  dev.off()
}
