library(DESeq2)
library(scran)
library(BiocParallel)
library(EnhancedVolcano)
library(magrittr)


deseq_func <- function(countDataName,colDataName,ncol_curr,SAV_PREFIX,designPattern,reducePattern) {
    
    # read data
    countData <- read.csv(file =countDataName )
    colData <- read.csv(file = colDataName)
    # arrange cts
    coln<-colnames(countData)
    rown<-countData$index
    
    countData=matrix(unlist(countData), ncol = ncol_curr)
    countData=t(countData[,2:ncol_curr])
    
    rownames(countData) <- coln[2:ncol_curr] # use colname for rows as countData is transposed
    colnames(countData) <- rown
    # countData[countData == 0] <- 1 # add a pesudo count to 0 expression: https://www.biostars.org/p/440379/
    #write.csv(countData, paste("D:/muscle_merfish_revision_data/results/",fld,"/r_differential_expression/countDataTest.csv",sep=""))

    # arrange colData 
    rown<-colData$index
    d<-dim(colData)
    colData=colData[,2:d[2]]
    rownames(colData)<-rown

    colData$expType <- factor(colData$expType)
    colData$sample <- factor(colData$sample)

    print(all(rownames(colData) == colnames(countData)))

    #Estimate size factors with scran (this can take a while, look into parallelization?)
    start_time <- Sys.time()
    sizeFactors<-scran::calculateSumFactors(countData)
    end_time <- Sys.time()
    print(end_time-start_time)
    print('sizeFactor calculation done')
    #threshold based on sizeFactors
    ind=which(sizeFactors>.25)

    colData$sizeFactors<-sizeFactors
    #countData<-countData[,ind]
    #colData<-colData[ind,]

    # warp data into dds
    dds<- DESeqDataSetFromMatrix(countData = countData, colData = colData, design = designPattern)
    #dds$genotype <- relevel(dds$genotype, ref = "Control")
    # dds$genotype <- factor(dds$genotype, levels = c("Control","FSHD1"))
    #Utilize preset size factors
    sizeFactors(dds)<-colData$sizeFactors

    print(dds)
    saveRDS(dds, file = paste(SAV_PREFIX,"_dds.rds",sep=""))
    # readRDS(file = "dds.rds")
    # calculate differential analysis result with appropriate parameters
    # register(MulticoreParam(40)) #You can adjust the number of cores
    # dds<-DESeq(dds,test='LRT',useT=TRUE,minmu=1e-6,minReplicatesForReplace=Inf,fitType='glmGamPoi',parallel=TRUE,reduced=~batch,BPPARAM=MulticoreParam(40))
    dds1<-DESeq(dds,test='LRT',useT=TRUE,minmu=1e-6,minReplicatesForReplace=Inf,parallel=FALSE, reduced=reducePattern)
    
    return(dds1)

}

#ncol_curr=139
#
#
#selectedGenes=c('MYOG','NEB','MYH8','TTN','MYH3','CDK1','CCNA1','DUXA','DUXB','H3.Y','KDM4E','KHDC1L','KLF17','LEUTX','PRAMEF12','PRAMEF20','RBP7','RFPL1','RFPL2','RFPL4B','SLC34A2','SLC38A1','TAF11L11','VMO1','ZNF280A','ZNF296','ZNF596','ZNF705G','ZSCAN4','DBET', 'RBBP6')
#selectedGenes=c('MYOG','NEB','MYH8','TTN','MYH3','CDK1','CCNA1','DUXA','DUXB','H3.Y','KDM4E','KHDC1L','KLF17','LEUTX','PRAMEF12','PRAMEF20','RBP7','RFPL1','RFPL2','RFPL4B','SLC34A2','SLC38A1','TAF11L11','VMO1','ZNF280A','ZNF296','ZNF596','ZNF705G','ZSCAN4')

# tube/ntube, genotypes
genotypes=c('FSHD1_tube_pseudobulk','DEL5_tube_pseudobulk','Ctrl_tube_pseudobulk')
gene_num=c(26,30,24)

for (i in 1:length(genotypes)){
    g=genotypes[i]
    num=gene_num[i]

    # FSHD1 tube/nontube
    SAV_PREFIX=paste("D:/muscle_merfish_revision_data/original_data/r_based_analysis_data/myotube/",g,sep="")
    countDataName=paste("D:/muscle_merfish_revision_data/original_data/r_based_analysis_data/myotube/",g,"_countData.csv",sep="")
    colDataName=paste("D:/muscle_merfish_revision_data/original_data/r_based_analysis_data/myotube/",g,"_colData.csv",sep="")
    dds1=deseq_func(countDataName,colDataName,num+1,SAV_PREFIX,as.formula(" ~ batch+expType"),as.formula(" ~ batch"))
    print("here")
    res<-results(dds1,contrast=c("expType","zcase","ctrl"),test='Wald')
    write.csv(res, paste("D:/muscle_merfish_revision_data/results/myotube/r_differential_expression/diff_res_",g,".csv",sep=""))

    pdf(paste("D:/muscle_merfish_revision_data/results/myotube/r_differential_expression/figures/all/",g,".pdf",sep=""))
    print(EnhancedVolcano(res,lab = rownames(res),x = 'log2FoldChange', xlab = bquote(~Log[2]~ 'fold change (Myotube/Non-myotube)') ,y = 'padj',pCutoff = 10e-3,FCcutoff = 0.5,labSize = 3.0, xlim=c(-8,8), ylim=c(0,350), drawConnectors = TRUE))
    dev.off()

    pdf(paste("D:/muscle_merfish_revision_data/results/myotube/r_differential_expression/figures/DUX/",g,".pdf",sep=""))
    #print(EnhancedVolcano(res,lab = rownames(res),x = 'log2FoldChange', xlab = bquote(~Log[2]~ 'fold change (Myotube/Non-myotube)') ,y = 'padj',pCutoff = 10e-3,FCcutoff = 0.5,labSize = 3.0, xlim=c(-5,5), ylim=c(0,100), drawConnectors = TRUE))
    print(EnhancedVolcano(res,lab = rownames(res),x = 'log2FoldChange', xlab = bquote(~Log[2]~ 'fold change (Myotube/Non-myotube)') ,y = 'padj',pCutoff = 10e-3,FCcutoff = 0.5,labSize = 1.0, xlim=c(-8,8), ylim=c(0,350), drawConnectors = TRUE))
    dev.off()
}


# DISEASE TUBE TO CTRL TUBE
#genotypes=c('FSHD1_Ctrl_tube_pseudobulk','DMutant_Ctrl_tube_pseudobulk','DEL5_Ctrl_tube_pseudobulk')
genotypes=c('FSHD1_Ctrl_tube_pseudobulk','DEL5_Ctrl_tube_pseudobulk')
gene_num=c(15,21)

for (i in 1:length(genotypes)){
    g=genotypes[i]
    num=gene_num[i]

    SAV_PREFIX=paste("D:/muscle_merfish_revision_data/original_data/r_based_analysis_data/myotube/",g,sep="")
    countDataName=paste("D:/muscle_merfish_revision_data/original_data/r_based_analysis_data/myotube/",g,"_countData.csv",sep="")
    colDataName=paste("D:/muscle_merfish_revision_data/original_data/r_based_analysis_data/myotube/",g,"_colData.csv",sep="")
    dds1=deseq_func(countDataName,colDataName,num+1,SAV_PREFIX,as.formula(" ~ batch+expType"),as.formula(" ~ batch"))
    res<-results(dds1,contrast=c("expType","zcase","ctrl"),test='Wald')
    write.csv(res, paste("D:/muscle_merfish_revision_data/results/myotube/r_differential_expression/diff_res_",g,".csv",sep=""))

    pdf(paste("D:/muscle_merfish_revision_data/results/myotube/r_differential_expression/figures/all/",g,".pdf",sep=""))
    print(EnhancedVolcano(res,lab = rownames(res),x = 'log2FoldChange', xlab = bquote(~Log[2]~ 'fold change (Myotube/Non-myotube)') ,y = 'padj',pCutoff = 10e-3,FCcutoff = 0.5,labSize = 3.0, xlim=c(-9,9), ylim=c(0,350), drawConnectors = TRUE))
    dev.off()

    pdf(paste("D:/muscle_merfish_revision_data/results/myotube/r_differential_expression/figures/DUX/",g,".pdf",sep=""))
    #print(EnhancedVolcano(res,lab = rownames(res),x = 'log2FoldChange', xlab = bquote(~Log[2]~ 'fold change (Myotube/Non-myotube)') ,y = 'padj',pCutoff = 10e-3,FCcutoff = 0.5,labSize = 3.0, xlim=c(-5,5), ylim=c(0,100), drawConnectors = TRUE))
    print(EnhancedVolcano(res,lab = rownames(res),x = 'log2FoldChange', xlab = bquote(~Log[2]~ 'fold change (Myotube/Non-myotube)') ,y = 'padj',pCutoff = 10e-3,FCcutoff = 0.5,labSize = 1.0, xlim=c(-9,9), ylim=c(0,350), drawConnectors = TRUE))
    dev.off()
}

# DISEASE non TUBE TO CTRL non TUBE
#genotypes=c('FSHD1_Ctrl_ntube_pseudobulk','DMutant_Ctrl_ntube_pseudobulk','DEL5_Ctrl_ntube_pseudobulk')
genotypes=c('FSHD1_Ctrl_ntube_pseudobulk','DEL5_Ctrl_ntube_pseudobulk')
gene_num=c(15,21)

for (i in 1:length(genotypes)){
    g=genotypes[i]
    num=gene_num[i]

    SAV_PREFIX=paste("D:/muscle_merfish_revision_data/original_data/r_based_analysis_data/myotube/",g,sep="")
    countDataName=paste("D:/muscle_merfish_revision_data/original_data/r_based_analysis_data/myotube/",g,"_countData.csv",sep="")
    colDataName=paste("D:/muscle_merfish_revision_data/original_data/r_based_analysis_data/myotube/",g,"_colData.csv",sep="")
    dds1=deseq_func(countDataName,colDataName,num+1,SAV_PREFIX,as.formula(" ~ batch+expType"),as.formula(" ~ batch"))
    res<-results(dds1,contrast=c("expType","zcase","ctrl"),test='Wald')
    write.csv(res, paste("D:/muscle_merfish_revision_data/results/myotube/r_differential_expression/diff_res_",g,".csv",sep=""))

    pdf(paste("D:/muscle_merfish_revision_data/results/myotube/r_differential_expression/figures/all/",g,".pdf",sep=""))
    print(EnhancedVolcano(res,lab = rownames(res),x = 'log2FoldChange', xlab = bquote(~Log[2]~ 'fold change (Myotube/Non-myotube)') ,y = 'padj',pCutoff = 10e-3,FCcutoff = 0.5,labSize = 3.0, xlim=c(-8,8), ylim=c(0,350), drawConnectors = TRUE))
    dev.off()

    pdf(paste("D:/muscle_merfish_revision_data/results/myotube/r_differential_expression/figures/DUX/",g,".pdf",sep=""))
    #print(EnhancedVolcano(res,lab = rownames(res),x = 'log2FoldChange', xlab = bquote(~Log[2]~ 'fold change (Myotube/Non-myotube)') ,y = 'padj',pCutoff = 10e-3,FCcutoff = 0.5,labSize = 3.0, xlim=c(-5,5), ylim=c(0,100), drawConnectors = TRUE))
    print(EnhancedVolcano(res,lab = rownames(res),x = 'log2FoldChange', xlab = bquote(~Log[2]~ 'fold change (Myotube/Non-myotube)') ,y = 'padj',pCutoff = 10e-3,FCcutoff = 0.5,labSize = 1.0, xlim=c(-8,8), ylim=c(0,350), drawConnectors = TRUE))
    dev.off()
}

