#This file tests DESeq2 on imbalanced data

library(DESeq2)

setwd("~/Documents/DEGage_stuff/DEGage_Testing/Comparative Analysis/imbalanced_resutls/")

#testing
fs <- list.files("data/")
allsims <- data.frame()
for(f in fs){
  counts <- read.csv(paste("data/", f, sep = ""))
  rownames(counts) <- counts[,1]
  counts <- counts[,-1]
  t <- strsplit(f, "v")[[1]]
  t[2] <- substr(t[2], 1, nchar(t[2]) - 3)
  t <- as.numeric(t)
  for(j in 1:5){
    trial_id <- paste(t[1], "_", t[2], "_", j, sep = "")
    print(trial_id)

    filler <- matrix(c(rep("Group.1", t[1]), rep("Group.2", t[2])), nrow = t[1]+t[2], dimnames = list(colnames(counts), 'Group'))
    DESeqobj <- DESeqDataSetFromMatrix(countData = counts+1, colData = filler, design = ~Group)
    sim.starttime <- Sys.time()
    sim.Deseq <-DESeq(DESeqobj)
    sim.endtime <- Sys.time()
    sim.runtime.DEseq <- sim.endtime - sim.starttime
    simresults <- results(sim.Deseq)

    expDEs <- simresults[is.na(simresults$padj) == FALSE,]
    expDEs <- rownames(expDEs[expDEs$padj <= 0.05,])

    res <- res[!is.na(res$pval),]
    res <- res[res$FDR <= 0.05,]
    expDEs<- rownames(res[res$FDR <= 0.05,])

    trueDEs <- rownames(counts[1:2000,])

    trueEEs <- rownames(counts[2001:nrow(counts),])

    tp <- sum(trueDEs %in% expDEs)
    fp <- sum(trueEEs %in% expDEs)
    tn <- sum(!(trueEEs %in% expDEs))
    fn <- sum(!(trueDEs %in% expDEs))

    sim.sensitivity <- tp/(tp+fn)
    sim.specificity <- tn/(tn+fp)
    precision <- tp/(tp+fp)
    accuracy <- (tp +tn)/20000
    f1 <- (2*tp)/(2*tp+fp+fn)

    simdf <- data.frame("Trial" = trial_id,
                        DEGs= length(expDEs),
                        "Sensitivity" = sim.sensitivity,
                        "Specificity" = sim.specificity,
                        "Precision" = precision,
                        "Accuracy" = accuracy,
                        "F1" = f1,
                        g1 = t[1],
                        g2 = t[2])
    allsims <- rbind(allsims, simdf)
    print(simdf)

    res$trial_id <- trial_id
    res$g1 <- t[1]
    res$g2 <- t[2]

    fname <- paste("Deseq2/", trial_id, ".csv", sep="")
    write.csv(res, fname)
    write.csv(allsims, "Deseq2/allsims.csv")
  }
}


