fld="nuclei"
gene_by_cell <- read.csv(file = paste("D:/Kyoko_Xiangduo_MERFISH_cell_cluster_project/dataset_pooled/original_data/r_based_analysis_data/", fld, "/gene_by_cell.csv",sep=""))

library(tasic2016data)
library(scrattch.hicat)
library(dendextend)
library(dplyr)
library(matrixStats)
library(Matrix)


# 1. parameters, currently: padj.th, lfc.th, low.th, q2.th are default, q1.th is default for low num of genes.
# important para: q.diff.th and de.score.th, the combination of these two determines if a cluster is preserved for enough diff genes
# de.score.th now is the recommended val for large datasets
# q.diff.th is manually selected for now, will check later

# the algo is not very stable for different q.diff.th and de.score.th. hence we temporally chosen q.diff.th=0.35 and de.score.th=150

de.param <- de_param(padj.th     = 0.001, 
             lfc.th      = 1, 
             low.th      = 1, 
             q1.th       = 0.3,
             q2.th       = NULL,
             q.diff.th   = 0.55, 
             de.score.th = 200,
             min.cells = 4)

# 2: delete previous calculation
unlink(paste("D:/Kyoko_Xiangduo_MERFISH_cell_cluster_project/dataset_pooled/results/", fld, "/data/hicat/subsample_PCA",sep=""), recursive=TRUE)

# 3: loop different conditions
coln<-colnames(gene_by_cell)
rown<-rownames(gene_by_cell)

gene_by_cell1=matrix(unlist(gene_by_cell), ncol = 141)
gene_by_cell1=gene_by_cell1[,1:141]
rownames(gene_by_cell1) <- rown
colnames(gene_by_cell1) <- coln

norm.dat <- log2(t(gene_by_cell1) + 1)

# 2. Dimension Filtering
gene.counts <- colSums(norm.dat > 0)
rm.eigen <- matrix(log2(gene.counts), ncol = 1)
row.names(rm.eigen) <- names(gene.counts)
colnames(rm.eigen) <- "log2GeneCounts"


# 4. clustering and consensus - bootstraping for robustness
result <- run_consensus_clust(norm.dat, 
                              niter = 100,
                              de.param = de.param, 
                              rm.eigen = rm.eigen, 
                              dim.method = "pca", 
                              output_dir = paste("D:/Kyoko_Xiangduo_MERFISH_cell_cluster_project/dataset_pooled/results/", fld, "/data/hicat/subsample_PCA",sep="")
                              )

# 4-0. one step clustering try
#result1 <- onestep_clust(norm.dat, dim.method = "pca", de.param = de.param, rm.eigen = rm.eigen)
#display.result <- display_cl(result$cl, norm.dat, plot = TRUE, de.param = de.param)

# 4-1: display results
pdf(file=paste("D:/Kyoko_Xiangduo_MERFISH_cell_cluster_project/dataset_pooled/results/", fld, "/data/hicat/differentialGeneClustering.pdf",sep=""))
display.result <- display_cl(result$cl.result$cl, norm.dat, plot = TRUE, de.param = de.param)
dev.off()

clusterr <-result$cl.result$cl
diffGenes <- result$cl.result$markers

write.csv(clusterr, paste("D:/Kyoko_Xiangduo_MERFISH_cell_cluster_project/dataset_pooled/results/", fld, "/data/clusterr.csv",sep=""))