setwd("~/Documents/DEGage_stuff/DEGage_testing_2/review_round_2")

#DATA SIMULATION
#load in the modified version of degage_simulation
library(parallel)
#disps <- c( 0.1, 1, 10, 100, 1000)
effect.sizes <- c(1.5, 2.5, 3.5, 4.5, 5.5, 6.5,7.5)
cell.sizes <- c(10, 25,  50, 100)

#i = disps, j = effects, k = sample1, w = sample2
disps <- c(0.1, 0.5, 1, 5, 10)
effect.sizes <- c(1.5, 2.5, 3.5, 4.5, 5.5, 6.5,7.5)
ngroup1 <- c(100, 100, 100, 100, 50, 50, 50, 25, 25, 10)
ngroup2 <- c(100, 50,  25,  10,  50, 25, 10, 25, 10, 10)
replicate <- c(1:10)
#i = disps, j = effects, k=sample sizes, w = replicate
path <- "sim_data/"

for(i in disps){
  for(j in effect.sizes){
    for(k in 1:length(ngroup1)){
      for(w in replicate){
        fname = paste(path, i, j, ngroup1[k], ngroup2[k], w, ".csv", sep = "_")
        print(fname)
        x <- DEGage_Simulation(ngenes = 9000, ndegs = 1000,
                               cellgroups = factor(c(rep(1, ngroup1[k]), rep(2, ngroup2[k]))),
                               lfc = j,
                               dispersions = i,
                               min.prop.zeros = 0.1,
                               max.prop.zeros = 0.5
        )
        write.csv(x, fname)
      }
    }
  }
}

files <- list.files("sim_data_3/")
#degage
library(DEGage)
for(f in files){
  labels <- strsplit(f, "_")[[1]]
  #check these
  a <- as.numeric(labels[4])
  b <- as.numeric(labels[5])
  effect <- as.numeric(labels[3])
  disp <- as.numeric(labels[2])

      print(f)
      counts <- read.csv(paste("sim_data_3/", f, sep = ""))
      rownames(counts) <- counts$X
      counts <- counts[,-1]

      res <- DEGage(counts,
                    group = factor(c(rep(1, a), rep(2, b))),
                    perm.preprocess = FALSE,
                    subsampled.k = F
      )
      write.csv(res, paste("degage_imbalance_res_3_random/", f, sep = ""))
}


#edger
library(edgeR)

for(f in files){
  labels <- strsplit(f, "_")[[1]]
  print(f)
  #check these
  a <- as.numeric(labels[4])
  b <- as.numeric(labels[5])
  effect <- as.numeric(labels[3])
  disp <- as.numeric(labels[2])

  counts <- read.csv(paste("sim_data_3/", f, sep = ""))
  rownames(counts) <- counts$X
  counts <- counts[,-1]
  group <- matrix(c(rep("Group1", a),rep("Group2",b)),nrow = a+b, dimnames = list(colnames(counts), 'Group'))
  group <-factor(group)

  list <-DGEList(counts)

  design <- model.matrix(~0+group)
  colnames(design) <- levels(group)

  sim.starttime <- Sys.time()

  AveLogCPM <-aveLogCPM(list)

  list <- calcNormFactors(list)
  print('dispersion')
  list <- estimateDisp(list, design, Robust = TRUE)
  print("ffit")
  fit <-glmQLFit(list, design, robust = TRUE)

  onev.two <-makeContrasts(Group1-Group2, levels = design)
  print("ftest")
  res <- glmQLFTest(fit, contrast = onev.two)

  sim.endtime <- Sys.time()
  sim.runtime <- difftime(sim.starttime, sim.endtime)

  topgenes <- as.data.frame(topTags(res, n =20000 ,adjust.method = "fdr",p.value = 0.05))
  write.csv(topgenes, paste("edger_imbalance_res_3/", f, sep = ""))
}

#wilcoxon
for(f in files){

  labels <- strsplit(f, "_")[[1]]
  #check these
  a <- as.numeric(labels[4])
  b <- as.numeric(labels[5])
  effect <- as.numeric(labels[3])
  disp <- as.numeric(labels[2])

    print(f)
    counts <- read.csv(paste("sim_data_3/", f, sep = ""))
    rownames(counts) <- counts$X
    counts <- counts[,-1]
    x <- factor(c(rep(1,a), rep(2,b)))
    pvals <- c()
    for( j in 1:nrow(counts)){
      g1 <- as.numeric(counts[,x == 1][j,])
      g2 <- as.numeric(counts[,x == 2][j,])
      p <-wilcox.test(g1, g2)
      pvals <- c(pvals, p$p.value)
    }

    simresults <- data.frame(gene = rownames(counts),
                             pval = pvals,
                             FDR = p.adjust(pvals, method = "fdr"))
    write.csv(simresults, paste("wilcoxon_imbalance_res_3/", f, sep = ""))
}


#DEseq2
library(DESeq2)
for(f in files){
  labels <- strsplit(f, "_")[[1]]
  print(f)
  #check these
  a <- as.numeric(labels[4])
  b <- as.numeric(labels[5])
  effect <- as.numeric(labels[3])
  disp <- as.numeric(labels[2])

  counts <- read.csv(paste("sim_data_3/", f, sep = ""))
  rownames(counts) <- counts$X
  counts <- counts[,-1]

  filler <- matrix(c(rep("Group.1", a), rep("Group.2", b)), nrow = a+b, dimnames = list(colnames(counts), 'Group'))
  DESeqobj <- DESeqDataSetFromMatrix(countData = counts+1, colData = filler, design = ~Group)
  sim.starttime <- Sys.time()
  sim.Deseq <-DESeq(DESeqobj)
  sim.endtime <- Sys.time()
  sim.runtime.DEseq <- sim.endtime - sim.starttime
  simresults <- results(sim.Deseq)

  write.csv(simresults, paste("deseq2_imbalance_res_3/", f, sep = ""))
}

#glm.nb
library(MASS)

files <- list.files("sim_data_3/")

for(f in files){
  labels <- strsplit(f, "_")[[1]]
  print(f)
  #check these
  a <- as.numeric(labels[4])
  b <- as.numeric(labels[5])
  effect <- as.numeric(labels[3])
  disp <- as.numeric(labels[2])

    counts <- read.csv(paste("sim_data_3/", f, sep = ""))
    rownames(counts) <- counts$X
    counts <- counts[,-1]

    pvals <- c()
    for( i in 1:nrow(counts)){
      design <- data.frame(counts = as.numeric(counts[i,]),
                           group = c(rep(1, a), rep(2, b)))
      res <- tryCatch({res<-glm.nb(counts~group, design)},
                      error = function(e){return(NA)})
      if(length(res) == 1){
        p <- NA
      }else{
        lrt.res <- anova(res)
        p <- lrt.res$`Pr(>Chi)`[2]
      }

      pvals <- c(pvals, p)
    }

    simresults <- data.frame(gene = rownames(counts),
                             pval = pvals,
                             FDR = p.adjust(pvals, method = "fdr"))
    write.csv(simresults, paste("glm_imbalance_res_3/", f, sep = ""))
}

