library("dplyr")
library(matrixStats)
library(pROC)
#This script was used to generate the ROC figure

##IF starting from scratch
path = paste("Comparative Analysis/Datasets/SDEE1", ".csv", sep = "")
counts <- read.csv(path, header = TRUE)
rownames(counts) <- counts[,1]
SD <- counts[,-1]

labels <- c(rep(0,2000),rep(1, 18000))
trueDEs <- rownames(SD[1:2000,])
trueEEs <- rownames(SD[2001:20000,])

#DOTNB
library(DEGage)
x <- factor(c(rep(1,75), rep(2,75)))
simresults <- DEGage(SD, x, perm.preprocess = FALSE, nperms = 10000, mean.ratio = 2)
DEgage.pvals <- simresults$pval
write.csv(pvals, "Comparative Analysis/ROCdata/DEGage.csv")

nDEs <- count(substr(rownames(simresults), 1,2) == "DE")
nEEs <- count(substr(rownames(simresults), 1,2) == "EE")

DEGAGE<- roc(labels, predictor = DEgage.pvals, na.rm = TRUE)

#EdgeR
library("edgeR")
group <- matrix(c(rep("Group1", 75),rep("Group2",75)),nrow = 150, dimnames = list(colnames(SD), 'Group'))
group <-factor(group)

list <-DGEList(SD)

design <- model.matrix(~0+group)
colnames(design) <- levels (group)

AveLogCPM <-aveLogCPM(list)
list <- calcNormFactors(list)
list <- estimateDisp(list, design, Robust = TRUE)
fit <-glmQLFit(list, design, robust = TRUE)

onev.two <-makeContrasts(Group1-Group2, levels = design)

res <- glmQLFTest(fit, contrast = onev.two)

pvals <- data.frame(pvals = res$table$PValue, row.names = rownames(SD))

EDGER <- roc(labels, predictor = pvals$pvals, na.rm = TRUE)

write.csv(pvals, "Comparative Analysis/ROCdata/edger.csv")

#Deseq2

library("DESeq2")

simcounts <- SD*100.0
simcounts <- as.data.frame(simcounts)
for(i in 1:ncol(simcounts)){
  simcounts[,i] <- as.integer(simcounts[,i])
}
simcounts[2030,] = simcounts[2030,]+1

filler <- matrix(c(rep("Group.1", 75), rep("Group.2", 75)), nrow = 150, dimnames = list(colnames(SD), 'Group'))
DESeqobj <- DESeqDataSetFromMatrix(countData = simcounts, colData = filler, design = ~Group)
sim.Deseq <-DESeq(DESeqobj)
sim.DEGresults <- results(sim.Deseq)

DESEQ2 <- roc(response = labels, predictor = sim.DEGresults$padj, na.rm = TRUE)
write.csv(sim.DEGresults, "Comparative Analysis//DEseq.csv")

#Monocle3
library(monocle3)
cell_metadata = data.frame(Group = c(rep(1,75), rep(2,75)), row.names = colnames(SD))
gene_metadata = data.frame(gene_short_name = rownames(SD), row.names = rownames(SD))
sim.cds <- new_cell_data_set(data.matrix(SD), cell_metadata = cell_metadata, gene_metadata = gene_metadata)

gene_fits <- fit_models(sim.cds, model_formula_str = "~Group")
fit_coefs <- coefficient_table(gene_fits)

intermediate <- fit_coefs %>% filter(term == "Group")
pvals <-intermediate$p_value
write.csv(pvals, "Comparative Analysis/ROCdata/Monocle3.csv")

MONOCLE3 <- roc(response = labels, predictor = pvals)


#DEsingle
library("DEsingle")
simgroups <- factor(c(rep(1,75), rep(2,75)))
simresults <- DEsingle(SD, simgroups)

simr <- DEtype(simresults, threshold = 0.05)
depvals <- rbind(simr[rownames(simr) %in% trueDEs, ],simr[rownames(simr) %in% trueEEs,])
write.csv(depvals, "Comparative Analysis/ROCdata/DEsingle.csv")

DESINGLE <- roc(response = labels, predictor = depvals$pvalue.adj.FDR)

#SCDD
library("scDD")
simsce <- SingleCellExperiment(assays = list(counts =  SD))
colData(simsce)$condition = c(rep("Group.1", 75), rep("Group.2", 75))
simsce <- preprocess(simsce, scran_norm = TRUE)

sim.scDD.output <- scDD(simsce)
sim.results <- results(sim.scDD.output)
# Scdd filtered 5 genes out for some reason
for(i in 1:5){
  sim.results <- rbind(sim.results,c(NA,NA))
}
write.csv(sim.results, "Comparative Analysis/ROCdata/scdd.csv")

SCDD <- roc(response = labels, predictor = sim.results$nonzero.pvalue.adj)


##Run if not from scratch
rocdat <- paste("Comparative Analysis/ROCdata/",list.files("Comparative Analysis/ROCdata/"),sep="")
DEGAGE <- read.csv(rocdat[1])
DESEQ2 <- read.csv(rocdat[2])
DESINGLE <- read.csv(rocdat[3])
EDGER <- read.csv(rocdat[4])
MONOCLE <- read.csv(rocdat[5])
SCDD <- read.csv(rocdat[6])

DEGAGE <- roc(response = labels, predictor = DEGAGE$x, na.rm = TRUE)
DESEQ2 <- roc(response = labels, predictor = DESEQ2$padj, na.rm = TRUE)
DESINGLE <- roc(response = labels, predictor = DESINGLE$pvalue.adj.FDR, na.rm = TRUE)
EDGER <- roc(response = labels, predictor = EDGER$pvals, na.rm = TRUE)
MONOCLE3 <- roc(response = labels, predictor = MONOCLE$x, na.rm = TRUE)
SCDD <- roc(response = labels, predictor = SCDD$combined.pvalue.adj, na.rm = TRUE)
roclist <-list(DEGAGE, DESEQ2, DESINGLE, EDGER, MONOCLE3, SCDD)


#Trying GGROC
library(ggsci)
library(ggplot2)
packagelabels <- c("DEGage", "DESeq2", "DESingle", "edger", "monocle3", "scDD")
colors <- pal_npg("nrc", alpha = 0.7)(6)
packagelabels <- c("DOTNB (AUC = .968 \u00b1 0.006)",
            "DESeq2 (AUC = .954 \u00b1 0.006)",
            "DEsingle (AUC = .932 \u00b1 0.006)",
            "edgeR (AUC = .965 \u00b1 0.005)",
            "monocle3 (AUC = .733 \u00b1 0.012)",
            "scDD (AUC = .799 \u00b1 0.011)")

Encoding(packagelabels)<-"UTF-8"
ggroc(roclist, size = 1)+
  scale_color_manual(values = colors,labels = packagelabels)+
  labs(color = "Package")+
  theme_minimal()

auc.se()

library(RColorBrewer)
cols = brewer.pal(6,"Set1")
#Plotting
par(pty = "s",mar = c(4,3,2,0) + 0.1, mgp = c(5,1,0), oma = c(5,3,10,0))

plot(DEGage, main = "\nROC", legacy.axes = TRUE ,
     col = cols[1], legend = TRUE, lw = 6,
     cex.axis = 1,cex.main = 2, line = .75,
     xlab = "", ylab ="")
title(xlab = "False Positive Rate", line = 3.25,cex.lab = 2)
title(ylab = "True Positive Rate", line = 4.5,cex.lab = 2)

plot(EDGER, legacy.axes = TRUE,
     col = cols[2], add= TRUE, lw = 6)

plot(DESEQ2, legacy.axes = TRUE, xlab = "False Positive Rate",ylab = "True Positive Rate" ,
     col = cols[3], add= TRUE, lw = 6)

plot(MONOCLE3, legacy.axes = TRUE, xlab = "False Positive Rate",ylab = "True Positive Rate" ,
     col = cols[4], add= TRUE, lw = 6)

plot(SCDD, legacy.axes = TRUE, xlab = "False Positive Rate",ylab = "True Positive Rate" ,
     col = cols[5], add= TRUE, lw = 6)

plot(DESINGLE, legacy.axes = TRUE, xlab = "False Positive Rate",ylab = "True Positive Rate" ,
     col = cols[6], add= TRUE, lw = 6)



labels <- c("DOTNB (AUC = .971 \u00b1 0.0051)",
            "edgeR (AUC = .965 \u00b1 0.0052)",
            "DESeq2 (AUC = .9529 \u00b1 0.0063)",
            "monocle3 (AUC = .7909 \u00b1 0.0123)",
            "scDD (AUC = .9267 \u00b1 0.0075)",
            "DEsingle (AUC = .9321 \u00b1 0.0068)")
Encoding(labels)<-"UTF-8"



legend(x = .6, y = .5,
       legend = labels,
       lty = 1, lwd = 6, y.intersp = .2,seg.len=.25,
       col =cols,
       ncol = 1,cex = 1.5,box.lwd = 0, box.col = "transparent", bg = "transparent",
       x.intersp = .1)

aucdf <- data.frame(DNB = auc(DOTnb), DESEQ = auc(DESEQ2), DEsingle = auc(DESINGLE),
                    monocle = auc(MONOCLE3), scDD = auc(SCDD), scanpy = auc(SCANPY), edger = auc(EDGER))
write.csv(aucdf, "Final Figures/aucdata.csv")

