library(scDD)
library (SingleCellExperiment)

# This file performs the comparative analysis for ssDD on real positive
# controls, negative controls, and simulated data

### Positive Controls real data

PositiveControl <-read.delim("Comparative Analysis/PositiveControl.csv", header = TRUE, sep = ",")
rownames(PositiveControl) <-PositiveControl[,1]
PositiveControl <- PositiveControl[,-1]
Goldstandard <-read.delim("Comparative Analysis/goldstandard_top1000DEGs.txt", header = TRUE, sep = "\n")
PositiveControl <- PositiveControl[-1:-8,-1:-6]
PositiveControl <- PositiveControl[substr(rownames(PositiveControl),1,2) != "r_",]
PositiveControl <- PositiveControl[,-93:-96]

pcsce <- SingleCellExperiment(assays = list(counts =  PositiveControl))
colData(pcsce)$condition = c(rep("Stem Cells", 48),rep("Fibroblasts",44))
pcsce <- preprocess(pcsce, scran_norm = TRUE)

starttime <- Sys.time()
scDD.output <- scDD(pcsce)
pc.runtime <- Sys.time() - starttime

pc.results <- results(scDD.output)
pc.DEGs <- pc.results[pc.results$nonzero.pvalue.adj <= 0.05,]

pc.ngenes <- nrow(pc.DEGs)

commongenes <- vector(mode = "character")
for(i in 1:pc.ngenes){
  if (any(Goldstandard == rownames(pc.DEGs[i,]))){
    commongenes <- c(commongenes, rownames(pc.DEGs[i,]))
  }
}
commongenes <-commongenes[-1]
sensitivity <- (length(commongenes)-1)/nrow(Goldstandard)

#NegativeControls
path <- "Comparative Analysis/Datasets/"
ncfiles <- list.files(path)
ncfiles <- ncfiles[grep("NC", ncfiles)]
ncdf <- data.frame()
for(i in 1:length(ncfiles)){
  print(i)
  ncpath <- paste(path, "NC", i, ".csv", sep = "")
  NegativeControl <-read.csv(ncpath, header = TRUE, sep = ",", skipNul = TRUE, encoding = "UTF-8")
  rownames(NegativeControl) <- make.unique(NegativeControl$X)
  NegativeControl <- NegativeControl[,-1]

  ncsce <- SingleCellExperiment(assays = list(counts =  NegativeControl))
  colData(ncsce)$condition = c(rep("Group.1", 38), rep("Group.2", 38))
  ncsce <- preprocess(ncsce, scran_norm = TRUE)

  starttime = Sys.time()
  nc.scDD.output <- scDD(ncsce)
  nc.runtime <- Sys.time() - starttime

  nc.results <- results(nc.scDD.output)
  ncngenes <- nrow(nc.results[nc.results$nonzero.pvalue.adj <= 0.05,])

  fprate <- ncngenes/nrow(NegativeControl)
  specificity <- (nrow(NegativeControl)- ncngenes)/nrow(NegativeControl)
  tempdf <- data.frame(spec = specificity, fprate = fprate, ndegs = ncngenes)
  ncdf <- rbind(ncdf, tempdf)
}
write.csv(ncdf, "Comparative Analysis/ncresults/scdd.csv")

#Simulated Data
path <- "Comparative Analysis/Datasets/"
files <- list.files(path)
files <- files[32:41]
finalscdddf <- data.frame()
for (i in 1:length(files)){
  print(files[i])
  SD <- read.delim(paste(path,files[i], sep = ""), header = TRUE, sep = ",")
  rownames(SD) <- SD$X
  SD <- SD[,-1]

  simsce <- SingleCellExperiment(assays = list(counts =  SD))
  colData(simsce)$condition = c(rep("Group.1", 75), rep("Group.2", 75))
  simsce <- preprocess(simsce, scran_norm = TRUE)

  starttime = Sys.time()
  sim.scDD.output <- scDD(simsce)
  sim.runtime <- difftime(Sys.time(), starttime)

  sim.results <- results(sim.scDD.output)
  sim.degs <- sim.results[sim.results$nonzero.pvalue.adj <= 0.05,]

  expDEs <- rownames(sim.degs)

  trueDEs <- rownames(SD[1:2000,])
  trueEEs <- rownames(SD[2001:20000,])

  tp <- sum(trueDEs %in% expDEs)
  fp <- sum(trueEEs %in% expDEs)
  tn <- sum(!(trueEEs %in% expDEs))
  fn <- sum(!(trueDEs %in% expDEs))

  sim.sensitivity <- tp/(tp+fn)
  sim.specificity <- tn/(tn+fp)
  precision <- tp/(tp+fp)
  accuracy <- (tp +tn)/20000
  f1 <- (2*tp)/(2*tp+fp+fn)

  simdf <- data.frame("Trial" = files[i], DEGs= length(expDEs), "Sensitivity" = sim.sensitivity, "Specificity" = sim.specificity, "Precision" = precision,
                      "Accuracy" = accuracy, "F1" = f1, "Runtime" = as.double(sim.runtime*60))

  finalscdddf  <- rbind(finalscdddf  , simdf)
  write.csv(finalscdddf , "Comparative Analysis/EE_results/scdd.csv")
}


