#This file tests DEGage on the imbalanced datasets.
#It was run twice: once with the subsampled.k parameter = T, and once with
#subsampled.k = F

library(DEGage)
setwd("~/Documents/DEGage_stuff/DEGage_Testing/Comparative Analysis/imbalanced_resutls/")

#testing
fs <- list.files("data/")
allsims <- data.frame()
for(f in fs){
  counts <- read.csv(paste("data/", f, sep = ""))
  rownames(counts) <- counts[,1]
  counts <- counts[,-1]
  t <- strsplit(f, "v")[[1]]
  t[2] <- substr(t[2], 1, nchar(t[2]) - 3)
  t <- as.numeric(t)
  for(j in 1:10){
    trial_id <- paste(t[1], "_", t[2], "_", j, sep = "")
    print(trial_id)

    group <- factor(c(rep(1, t[1]), rep(2, t[2])))
    res <- DEGage(counts, group, perm.preprocess = FALSE, subsampled.k = F)

    res <- res[!is.na(res$pval),]
    res <- res[res$permPvals < 0.1,]
    res <- res[res$FDR <= 0.05,]
    expDEs<- rownames(res[res$FDR <= 0.05,])

    trueDEs <- rownames(counts[1:2000,])

    trueEEs <- rownames(counts[2001:nrow(counts),])

    tp <- sum(trueDEs %in% expDEs)
    fp <- sum(trueEEs %in% expDEs)
    tn <- sum(!(trueEEs %in% expDEs))
    fn <- sum(!(trueDEs %in% expDEs))

    sim.sensitivity <- tp/(tp+fn)
    sim.specificity <- tn/(tn+fp)
    precision <- tp/(tp+fp)
    accuracy <- (tp +tn)/20000
    f1 <- (2*tp)/(2*tp+fp+fn)

    simdf <- data.frame("Trial" = trial_id,
                        DEGs= length(expDEs),
                        "Sensitivity" = sim.sensitivity,
                        "Specificity" = sim.specificity,
                        "Precision" = precision,
                        "Accuracy" = accuracy,
                        "F1" = f1,
                        g1 = t[1],
                        g2 = t[2])
    allsims <- rbind(allsims, simdf)
    print(simdf)

    res$trial_id <- trial_id
    res$g1 <- t[1]
    res$g2 <- t[2]

    fname <- paste("degage_boostrap/", trial_id, ".csv", sep="")
    write.csv(res, fname)
    write.csv(allsims, "degage_boostrap/allsims.csv")
  }
}

