library(DESeq2)
library(scran)
library(BiocParallel)
library(EnhancedVolcano)
library(magrittr)


deseq_func <- function(countDataName,colDataName,ncol_curr,SAV_PREFIX,designPattern,reducePattern) {
    
    # read data
    countData <- read.csv(file =countDataName )
    colData <- read.csv(file = colDataName)

    # arrange cts
    coln<-colnames(countData)
    rown<-countData$index

    countData=matrix(unlist(countData), ncol = ncol_curr)
    countData=t(countData[,2:ncol_curr])
    rownames(countData) <- coln[2:ncol_curr] # use colname for rows as countData is transposed
    colnames(countData) <- rown
    # countData[countData == 0] <- 1 # add a pesudo count to 0 expression: https://www.biostars.org/p/440379/
    #write.csv(countData, paste("D:/Kyoko_Xiangduo_MERFISH_cell_cluster_project/dataset_pooled/results/",fld,"/r_differential_expression/countDataTest.csv",sep=""))

    # arrange colData 
    rown<-colData$index
    d<-dim(colData)
    colData=colData[,2:d[2]]
    rownames(colData)<-rown

    colData$expType <- factor(colData$expType)
    colData$sample <- factor(colData$sample)

    print(all(rownames(colData) == colnames(countData)))

    #Estimate size factors with scran (this can take a while, look into parallelization?)
    start_time <- Sys.time()
    sizeFactors<-scran::calculateSumFactors(countData)
    end_time <- Sys.time()
    #print(end_time-start_time)
    #print('sizeFactor calculation done')c:\Users\xulab\AppData\Local\Temp\BNZ.6442ece48a9bc239\102022_Kyoto_XiangDuo_pipeline_cyto_scrattchhicat.md
    #threshold based on sizeFactors
    #ind=which(sizeFactors>.25)
    #ind=which(sizeFactors>0)
    #print(sizeFactors)

    colData$sizeFactors<-sizeFactors
    #countData<-countData[,ind]
    #colData<-colData[ind,]

    # warp data into dds
    dds<- DESeqDataSetFromMatrix(countData = countData, colData = colData, design = designPattern)
    #dds$genotype <- relevel(dds$genotype, ref = "Control")
    # dds$genotype <- factor(dds$genotype, levels = c("Control","FSHD1"))
    #Utilize preset size factors
    sizeFactors(dds)<-colData$sizeFactors

    print(dds)
    saveRDS(dds, file = paste(SAV_PREFIX,"_dds.rds",sep=""))
    # readRDS(file = "dds.rds")
    # calculate differential analysis result with appropriate parameters
    # register(MulticoreParam(40)) #You can adjust the number of cores
    # dds<-DESeq(dds,test='LRT',useT=TRUE,minmu=1e-6,minReplicatesForReplace=Inf,fitType='glmGamPoi',parallel=TRUE,reduced=~batch,BPPARAM=MulticoreParam(40))
    dds1<-DESeq(dds,test='LRT',useT=TRUE,minmu=1e-6,minReplicatesForReplace=Inf,parallel=FALSE, reduced=reducePattern)
    saveRDS(dds1, file = paste(SAV_PREFIX,"_dds1.rds",sep=""))
    return(dds1)

}

#ncol_curr=139
# selectedGenes contains DUX4 target gene, myotube marker gene, and common regulated genes
selectedGenes=c('MYOG','MYH8','NEB','TTN','MYH3','CDK1','CCNA1','DUXA','DUXB','H3.Y','KDM4E','KHDC1L','KLF17','LEUTX','PRAMEF12','PRAMEF20','RBP7','RFPL1','RFPL2','RFPL4B','SLC34A2','SLC38A1','TAF11L11','VMO1','ZNF280A','ZNF296','ZNF596','ZNF705G','ZSCAN4','DBET', 'RBBP6')
#selectedGenes=c('ACTA1','ACTN2','BMP1','CASQ2','CCNA1','CCND2','CDK1','COL18A1','COL4A1','COL4A2','COL5A1','COL5A3','COL6A1','COL6A2','COL8A1','DBET','DUXA','DUXB','FAS','FN1','GREM1','H3.Y','IGFBP3','KDM4E','KHDC1L','KLF17','KLHL41','LEUTX','LMOD2','LOX','LOXL1','LOXL2','MEF2C','MMP14','MYH3','MYH7','MYH8','MYL2','MYMX','MYOG','MYOM3','NEB','PRAMEF12','PRAMEF20','RBBP6','RBP7','RFPL1','RFPL2','RFPL4B','SERPINE1','SERPINE2','SLC34A2','SLC38A1','SMPX','SOX4','TAF11L11','TNNI2','TRIM72','TTN','VMO1','ZNF280A','ZNF296','ZNF596','ZNF705G','ZSCAN4')

# Clust1/clust0, allGenotypes
genotypes=c('allGenoType_pseudobulk')
for (g in genotypes){
    SAV_PREFIX=paste("D:/Kyoko_Xiangduo_MERFISH_cell_cluster_project/dataset_pooled/original_data/r_based_analysis_data/cyto/",g,sep="")
    countDataName=paste("D:/Kyoko_Xiangduo_MERFISH_cell_cluster_project/dataset_pooled/original_data/r_based_analysis_data/cyto/",g,"_countData.csv",sep="")
    colDataName=paste("D:/Kyoko_Xiangduo_MERFISH_cell_cluster_project/dataset_pooled/original_data/r_based_analysis_data/cyto/",g,"_colData.csv",sep="")
    dds1=deseq_func(countDataName,colDataName,141,SAV_PREFIX,as.formula(" ~ batch+expType"),as.formula(" ~ batch"))
    res<-results(dds1,contrast=c("expType","zcase","ctrl"),test='Wald')
    write.csv(res, paste("D:/Kyoko_Xiangduo_MERFISH_cell_cluster_project/dataset_pooled/results/cyto/r_differential_expression/diff_res_",g,".csv",sep=""))

    pdf(paste("D:/Kyoko_Xiangduo_MERFISH_cell_cluster_project/dataset_pooled/results/cyto/r_differential_expression/figures/all/",g,".pdf",sep=""))
    print(EnhancedVolcano(res,lab = rownames(res),x = 'log2FoldChange',y = 'padj',pCutoff = 10e-3,FCcutoff = 0.5,labSize = 3.0, xlim=c(-7,7), ylim=c(0,100), drawConnectors = TRUE))
    dev.off()
    pdf(paste("D:/Kyoko_Xiangduo_MERFISH_cell_cluster_project/dataset_pooled/results/cyto/r_differential_expression/figures/DUX/",g,".pdf",sep=""))
    print(EnhancedVolcano(res,lab = rownames(res),x = 'log2FoldChange',y = 'padj',pCutoff = 10e-3,FCcutoff = 0.5,labSize = 1.0,selectLab =selectedGenes, xlim=c(-7,7), ylim=c(0,100), drawConnectors = TRUE))
    dev.off()
}

# Clust1/clust0, perGenotypes
genotypes=c('FSHD1_pseudobulk','DEL5_pseudobulk','Ctrl_pseudobulk')
for (g in genotypes){
    SAV_PREFIX=paste("D:/Kyoko_Xiangduo_MERFISH_cell_cluster_project/dataset_pooled/original_data/r_based_analysis_data/cyto/",g,sep="")
    countDataName=paste("D:/Kyoko_Xiangduo_MERFISH_cell_cluster_project/dataset_pooled/original_data/r_based_analysis_data/cyto/",g,"_countData.csv",sep="")
    colDataName=paste("D:/Kyoko_Xiangduo_MERFISH_cell_cluster_project/dataset_pooled/original_data/r_based_analysis_data/cyto/",g,"_colData.csv",sep="")
    dds1=deseq_func(countDataName,colDataName,141,SAV_PREFIX,as.formula(" ~ batch+expType"),as.formula(" ~ batch"))
    res<-results(dds1,contrast=c("expType","zcase","ctrl"),test='Wald')
    write.csv(res, paste("D:/Kyoko_Xiangduo_MERFISH_cell_cluster_project/dataset_pooled/results/cyto/r_differential_expression/diff_res_",g,".csv",sep=""))

    pdf(paste("D:/Kyoko_Xiangduo_MERFISH_cell_cluster_project/dataset_pooled/results/cyto/r_differential_expression/figures/all/",g,".pdf",sep=""))
    print(EnhancedVolcano(res,lab = rownames(res),x = 'log2FoldChange',y = 'padj',pCutoff = 10e-3,FCcutoff = 0.5,labSize = 3.0, xlim=c(-7,7), ylim=c(0,250), drawConnectors = TRUE))
    dev.off()
    pdf(paste("D:/Kyoko_Xiangduo_MERFISH_cell_cluster_project/dataset_pooled/results/cyto/r_differential_expression/figures/DUX/",g,".pdf",sep=""))
    print(EnhancedVolcano(res,lab = rownames(res),x = 'log2FoldChange',y = 'padj',pCutoff = 10e-3,FCcutoff = 0.5,labSize = 1.0,selectLab =selectedGenes, xlim=c(-7,7), ylim=c(0,250), drawConnectors = TRUE))
    dev.off()
}


# clust1, disease- CLUST0, ctrl
genotypes=c('FSHD1_clust1_Ctrl_clust0_pseudobulk','DEL5_clust1_Ctrl_clust0_pseudobulk')
for (g in genotypes){
    SAV_PREFIX=paste("D:/Kyoko_Xiangduo_MERFISH_cell_cluster_project/dataset_pooled/original_data/r_based_analysis_data/cyto/",g,sep="")
    countDataName=paste("D:/Kyoko_Xiangduo_MERFISH_cell_cluster_project/dataset_pooled/original_data/r_based_analysis_data/cyto/",g,"_countData.csv",sep="")
    colDataName=paste("D:/Kyoko_Xiangduo_MERFISH_cell_cluster_project/dataset_pooled/original_data/r_based_analysis_data/cyto/",g,"_colData.csv",sep="")
    dds1=deseq_func(countDataName,colDataName,141,SAV_PREFIX,as.formula(" ~ batch+expType"),as.formula(" ~ batch"))
    res<-results(dds1,contrast=c("expType","zcase","ctrl"),test='Wald')
    write.csv(res, paste("D:/Kyoko_Xiangduo_MERFISH_cell_cluster_project/dataset_pooled/results/cyto/r_differential_expression/diff_res_",g,".csv",sep=""))

    pdf(paste("D:/Kyoko_Xiangduo_MERFISH_cell_cluster_project/dataset_pooled/results/cyto/r_differential_expression/figures/all/",g,".pdf",sep=""))
    print(EnhancedVolcano(res,lab = rownames(res),x = 'log2FoldChange',y = 'padj',pCutoff = 10e-3,FCcutoff = 0.5,labSize = 3.0, xlim=c(-5,5), ylim=c(0,25), drawConnectors = TRUE))
    dev.off()
    pdf(paste("D:/Kyoko_Xiangduo_MERFISH_cell_cluster_project/dataset_pooled/results/cyto/r_differential_expression/figures/DUX/",g,".pdf",sep=""))
    print(EnhancedVolcano(res,lab = rownames(res),x = 'log2FoldChange',y = 'padj',pCutoff = 10e-3,FCcutoff = 0.5,labSize = 1.0,selectLab =selectedGenes, xlim=c(-5,5), ylim=c(0,25), drawConnectors = TRUE))
    dev.off()
}

# clust0, disease-ctrl
genotypes=c('FSHD1_Ctrl_ctrl_pseudobulk','DEL5_Ctrl_ctrl_pseudobulk')
for (g in genotypes){
    SAV_PREFIX=paste("D:/Kyoko_Xiangduo_MERFISH_cell_cluster_project/dataset_pooled/original_data/r_based_analysis_data/cyto/",g,sep="")
    countDataName=paste("D:/Kyoko_Xiangduo_MERFISH_cell_cluster_project/dataset_pooled/original_data/r_based_analysis_data/cyto/",g,"_countData.csv",sep="")
    colDataName=paste("D:/Kyoko_Xiangduo_MERFISH_cell_cluster_project/dataset_pooled/original_data/r_based_analysis_data/cyto/",g,"_colData.csv",sep="")
    dds1=deseq_func(countDataName,colDataName,141,SAV_PREFIX,as.formula(" ~ batch+expType"),as.formula(" ~ batch"))
    res<-results(dds1,contrast=c("expType","zcase","ctrl"),test='Wald')
    write.csv(res, paste("D:/Kyoko_Xiangduo_MERFISH_cell_cluster_project/dataset_pooled/results/cyto/r_differential_expression/diff_res_",g,".csv",sep=""))

    pdf(paste("D:/Kyoko_Xiangduo_MERFISH_cell_cluster_project/dataset_pooled/results/cyto/r_differential_expression/figures/all/",g,".pdf",sep=""))
    print(EnhancedVolcano(res,lab = rownames(res),x = 'log2FoldChange',y = 'padj',pCutoff = 10e-3,FCcutoff = 0.5,labSize = 3.0, xlim=c(-5,5), ylim=c(0,25), drawConnectors = TRUE))
    dev.off()
    pdf(paste("D:/Kyoko_Xiangduo_MERFISH_cell_cluster_project/dataset_pooled/results/cyto/r_differential_expression/figures/DUX/",g,".pdf",sep=""))
    print(EnhancedVolcano(res,lab = rownames(res),x = 'log2FoldChange',y = 'padj',pCutoff = 10e-3,FCcutoff = 0.5,labSize = 1.0,selectLab =selectedGenes, xlim=c(-5,5), ylim=c(0,25), drawConnectors = TRUE))
    dev.off()
}

# clust1, cyto-nuclei
genotypes=c('FSHD1_pseudobulk_cyto_nuclei_clust1','DEL5_pseudobulk_cyto_nuclei_clust1')
for (g in genotypes){
    SAV_PREFIX=paste("D:/Kyoko_Xiangduo_MERFISH_cell_cluster_project/dataset_pooled/original_data/r_based_analysis_data/cyto/",g,sep="")
    countDataName=paste("D:/Kyoko_Xiangduo_MERFISH_cell_cluster_project/dataset_pooled/original_data/r_based_analysis_data/cyto/",g,"_countData.csv",sep="")
    colDataName=paste("D:/Kyoko_Xiangduo_MERFISH_cell_cluster_project/dataset_pooled/original_data/r_based_analysis_data/cyto/",g,"_colData.csv",sep="")
    dds1=deseq_func(countDataName,colDataName,141,SAV_PREFIX,as.formula(" ~ batch+expType"),as.formula(" ~ batch"))
    res<-results(dds1,contrast=c("expType","zcase","ctrl"),test='Wald')
    write.csv(res, paste("D:/Kyoko_Xiangduo_MERFISH_cell_cluster_project/dataset_pooled/results/cyto/r_differential_expression/diff_res_",g,".csv",sep=""))

    pdf(paste("D:/Kyoko_Xiangduo_MERFISH_cell_cluster_project/dataset_pooled/results/cyto/r_differential_expression/figures/all/",g,".pdf",sep=""))
    print(EnhancedVolcano(res,lab = rownames(res),x = 'log2FoldChange',y = 'padj',pCutoff = 10e-3,FCcutoff = 0.5,labSize = 3.0, xlim=c(-5,5), ylim=c(0,30), drawConnectors = TRUE))
    dev.off()
    pdf(paste("D:/Kyoko_Xiangduo_MERFISH_cell_cluster_project/dataset_pooled/results/cyto/r_differential_expression/figures/DUX/",g,".pdf",sep=""))
    print(EnhancedVolcano(res,lab = rownames(res),x = 'log2FoldChange',y = 'padj',pCutoff = 10e-3,FCcutoff = 0.5,labSize = 1.0,selectLab =selectedGenes, xlim=c(-5,5), ylim=c(0,30), drawConnectors = TRUE))
    dev.off()
}

# clust0, cyto-nuclei
genotypes=c('FSHD1_pseudobulk_cyto_nuclei_clust0','DEL5_pseudobulk_cyto_nuclei_clust0','Ctrl_pseudobulk_cyto_nuclei_clust0')
for (g in genotypes){
    SAV_PREFIX=paste("D:/Kyoko_Xiangduo_MERFISH_cell_cluster_project/dataset_pooled/original_data/r_based_analysis_data/cyto/",g,sep="")
    countDataName=paste("D:/Kyoko_Xiangduo_MERFISH_cell_cluster_project/dataset_pooled/original_data/r_based_analysis_data/cyto/",g,"_countData.csv",sep="")
    colDataName=paste("D:/Kyoko_Xiangduo_MERFISH_cell_cluster_project/dataset_pooled/original_data/r_based_analysis_data/cyto/",g,"_colData.csv",sep="")
    dds1=deseq_func(countDataName,colDataName,141,SAV_PREFIX,as.formula(" ~ batch+expType"),as.formula(" ~ batch"))
    res<-results(dds1,contrast=c("expType","zcase","ctrl"),test='Wald')
    write.csv(res, paste("D:/Kyoko_Xiangduo_MERFISH_cell_cluster_project/dataset_pooled/results/cyto/r_differential_expression/diff_res_",g,".csv",sep=""))

    pdf(paste("D:/Kyoko_Xiangduo_MERFISH_cell_cluster_project/dataset_pooled/results/cyto/r_differential_expression/figures/all/",g,".pdf",sep=""))
    print(EnhancedVolcano(res,lab = rownames(res),x = 'log2FoldChange',y = 'padj',pCutoff = 10e-3,FCcutoff = 0.5,labSize = 3.0, xlim=c(-5,5), ylim=c(0,30), drawConnectors = TRUE))
    dev.off()
    pdf(paste("D:/Kyoko_Xiangduo_MERFISH_cell_cluster_project/dataset_pooled/results/cyto/r_differential_expression/figures/DUX/",g,".pdf",sep=""))
    print(EnhancedVolcano(res,lab = rownames(res),x = 'log2FoldChange',y = 'padj',pCutoff = 10e-3,FCcutoff = 0.5,labSize = 1.0,selectLab =selectedGenes, xlim=c(-5,5), ylim=c(0,30), drawConnectors = TRUE))
    dev.off()
}

# all, cyto-nuclei
genotypes=c('FSHD1_pseudobulk_cyto_nuclei_all','DEL5_pseudobulk_cyto_nuclei_all','Ctrl_pseudobulk_cyto_nuclei_all')
for (g in genotypes){
    SAV_PREFIX=paste("D:/Kyoko_Xiangduo_MERFISH_cell_cluster_project/dataset_pooled/original_data/r_based_analysis_data/cyto/",g,sep="")
    countDataName=paste("D:/Kyoko_Xiangduo_MERFISH_cell_cluster_project/dataset_pooled/original_data/r_based_analysis_data/cyto/",g,"_countData.csv",sep="")
    colDataName=paste("D:/Kyoko_Xiangduo_MERFISH_cell_cluster_project/dataset_pooled/original_data/r_based_analysis_data/cyto/",g,"_colData.csv",sep="")
    dds1=deseq_func(countDataName,colDataName,141,SAV_PREFIX,as.formula(" ~ batch+expType"),as.formula(" ~ batch"))
    res<-results(dds1,contrast=c("expType","zcase","ctrl"),test='Wald')
    write.csv(res, paste("D:/Kyoko_Xiangduo_MERFISH_cell_cluster_project/dataset_pooled/results/cyto/r_differential_expression/diff_res_",g,".csv",sep=""))

    pdf(paste("D:/Kyoko_Xiangduo_MERFISH_cell_cluster_project/dataset_pooled/results/cyto/r_differential_expression/figures/all/",g,".pdf",sep=""))
    print(EnhancedVolcano(res,lab = rownames(res),x = 'log2FoldChange',y = 'padj',pCutoff = 10e-3,FCcutoff = 0.5,labSize = 3.0, xlim=c(-5,5), ylim=c(0,30), drawConnectors = TRUE))
    dev.off()
    pdf(paste("D:/Kyoko_Xiangduo_MERFISH_cell_cluster_project/dataset_pooled/results/cyto/r_differential_expression/figures/DUX/",g,".pdf",sep=""))
    print(EnhancedVolcano(res,lab = rownames(res),x = 'log2FoldChange',y = 'padj',pCutoff = 10e-3,FCcutoff = 0.5,labSize = 1.0,selectLab =selectedGenes, xlim=c(-5,5), ylim=c(0,30), drawConnectors = TRUE))
    dev.off()
}