#This file tests the wilcoxon test on imbalanced data
setwd("~/Documents/DEGage_stuff/DEGage_Testing/Comparative Analysis/imbalanced_resutls/")

#testing
fs <- list.files("data/")
allsims <- data.frame()

for(f in fs){
  counts <- read.csv(paste("data/", f, sep = ""))
  rownames(counts) <- counts[,1]
  counts <- counts[,-1]
  t <- strsplit(f, "v")[[1]]
  t[2] <- substr(t[2], 1, nchar(t[2]) - 3)
  t <- as.numeric(t)
  for(j in 1:5){
    trial_id <- paste(t[1], "_", t[2], "_", j, sep = "")
    print(trial_id)

    x <- factor(c(rep(1,t[1]), rep(2,t[2])))
    sim.starttime <- Sys.time()
    pvals <- c()
    for( j in 1:nrow(counts)){
      g1 <- as.numeric(counts[,x == 1][j,])
      g2 <- as.numeric(counts[,x == 2][j,])
      p <-wilcox.test(g1, g2)
      pvals <- c(pvals, p$p.value)
    }
    simresults <- data.frame(gene = rownames(counts), pval = pvals, FDR = p.adjust(pvals, method = "fdr"))
    sim.endtime <- Sys.time()
    sim.runtime <- difftime(Sys.time(), sim.starttime, units = "sec")

    simresults<- simresults[!is.na(simresults$pval),]
    expDEs<- simresults[simresults$pval <= 0.05,]$gene

    trueDEs <- rownames(counts[1:2000,])
    trueEEs <- rownames(counts[2001:nrow(counts),])

    tp <- sum(trueDEs %in% expDEs)
    fp <- sum(trueEEs %in% expDEs)
    tn <- sum(!(trueEEs %in% expDEs))
    fn <- sum(!(trueDEs %in% expDEs))

    sim.sensitivity <- tp/(tp+fn)
    sim.specificity <- tn/(tn+fp)
    precision <- tp/(tp+fp)
    accuracy <- (tp +tn)/20000
    f1 <- (2*tp)/(2*tp+fp+fn)

    simdf <- data.frame("Trial" = trial_id,
                        DEGs= length(expDEs),
                        "Sensitivity" = sim.sensitivity,
                        "Specificity" = sim.specificity,
                        "Precision" = precision,
                        "Accuracy" = accuracy,
                        "F1" = f1,
                        g1 = t[1],
                        g2 = t[2])
    allsims <- rbind(allsims, simdf)
    print(simdf)

    simresults$trial_id <- trial_id
    simresults$g1 <- t[1]
    simresults$g2 <- t[2]

    fname <- paste("wilcoxon/", trial_id, ".csv", sep="")
    write.csv(simresults, fname)
    write.csv(allsims, "wilcoxon/allsims.csv")
  }
}
