#This file generates the imbalance validation figure

setwd("~/Documents/DEGage_stuff/DEGage_Testing/Comparative Analysis/imbalanced_resutls")

library(ggplot2)
library(ggpubr)
library(tidyverse)
library(rstatix)

degage.old <- read.csv("degage/allsims.csv")
degage.new <- read.csv("degage_boostrap/allsims.csv")
wilcox <- read.csv("wilcoxon/allsims.csv")
edger <- read.csv("edger/allsims.csv")
deseq2 <- read.csv("Deseq2/allsims.csv")
monocle <- read.csv("monocle/allsims.csv")

degage.old$test <- "Subsampling"
degage.new$test <- "Random Assignment"
wilcox$test <- "Wilcoxon"
edger$test <- "EdgeR"
deseq2$test <- "DESeq2"
monocle$test <- "Monocle3"

degage.old$trial_label <- paste(degage.old$g1, "v", degage.old$g2, sep = " ")
degage.new$trial_label <- paste(degage.new$g1, "v", degage.new$g2, sep = " ")
wilcox$trial_label <- paste(wilcox$g1 , "v", wilcox$g2, sep = " ")
edger$trial_label <- paste(edger$g1 , "v", edger$g2, sep = " ")
deseq2$trial_label <- paste(deseq2$g1 , "v", deseq2$g2, sep = " ")
monocle$trial_label <- paste(monocle$g1 , "v", monocle$g2, sep = " ")

order <- c("100 v 100", "100 v 75", "75 v 75",
               "100 v 50", "50 v 50", "100 v 25",
               "25 v 25")
degage.old$trial_label <- factor(degage.old$trial_label, levels = order)
degage.new$trial_label <- factor(degage.new$trial_label, levels = order)
wilcox$trial_label <- factor(wilcox$trial_label, levels = order)
edger$trial_label <- factor(edger$trial_label, levels = order)
deseq2$trial_label <- factor(deseq2$trial_label, levels = order)
monocle$trial_label <- factor(monocle$trial_label, levels = order)

p = degage.old %>%
  t_test(F1 ~ trial_label) %>%
  filter(substr(group1, nchar(group1) - 2, nchar(group1)) ==
         substr(group2, nchar(group2) - 2, nchar(group2)))

degage.old.plot <- ggplot(degage.old, aes(x = trial_label, y= F1))+
  geom_boxplot() +
  geom_point()+
  theme_classic() +
  theme(axis.ticks.x = element_blank(),
        axis.text.x = element_text(angle = 45, hjust = 1),
        plot.title = element_text(hjust = 0.5)) +
  xlab("Sample sizes of condition 1 vs. condition 2")+
#  stat_pvalue_manual(p, label = "p", tip.length = 0.005,
#                     y.position = c(1)) +
  ylim(0, 1) +
  ggtitle("DEGage with subsampling")

p = degage.new %>%
  t_test(F1 ~ trial_label) %>%
  filter(substr(group1, nchar(group1) - 2, nchar(group1)) ==
           substr(group2, nchar(group2) - 2, nchar(group2)))

degage.new.plot <- ggplot(degage.new, aes(x = trial_label, y= F1))+
  geom_boxplot() +
  geom_point()+
  theme_classic() +
  theme(axis.ticks.x = element_blank(),
        axis.text.x = element_text(angle = 45, hjust = 1),
        plot.title = element_text(hjust = 0.5)) +
  xlab("Sample sizes of condition 1 vs. condition 2")+
  #stat_pvalue_manual(p, label = "p", tip.length = 0.005,
  #                   y.position = c(1)) +
  ylim(0, 1) +
  ggtitle("DEGage with random assignment")

#p = wilcox %>%
#  t_test(F1 ~ trial_label) %>%
#  filter(substr(group1, nchar(group1) - 2, nchar(group1)) ==
#           substr(group2, nchar(group2) - 2, nchar(group2)))

wilcox.plot <- ggplot(wilcox, aes(x = trial_label, y= F1))+
  geom_boxplot() +
  geom_point()+
  theme_classic() +
  theme(axis.ticks.x = element_blank(),
        axis.text.x = element_text(angle = 45, hjust = 1),
        axis.title.x = element_blank(),
        plot.title = element_text(hjust = 0.5)) +
#  stat_pvalue_manual(p, label = "p", tip.length = 0.005,
#                     y.position = c(1)) +
  ylim(0, 1)+
  ggtitle("Wilcoxon")

#p = edger %>%
#  t_test(F1 ~ trial_label) %>%
#  filter(substr(group1, nchar(group1) - 2, nchar(group1)) ==
#           substr(group2, nchar(group2) - 2, nchar(group2)))

edger.plot <- ggplot(edger, aes(x = trial_label, y= F1))+
  geom_boxplot() +
  geom_point()+
  theme_classic() +
  theme(axis.ticks.x = element_blank(),
        axis.text.x = element_text(angle = 45, hjust = 1),
        axis.title.x = element_blank(),
        plot.title = element_text(hjust = 0.5)) +
  #  stat_pvalue_manual(p, label = "p", tip.length = 0.005,
  #                     y.position = c(1)) +
  ylim(0, 1)+
  ggtitle("EdgeR")

#p = deseq2 %>%
#  t_test(F1 ~ trial_label) %>%
#  filter(substr(group1, nchar(group1) - 2, nchar(group1)) ==
#           substr(group2, nchar(group2) - 2, nchar(group2)))

deseq2.plot <- ggplot(deseq2, aes(x = trial_label, y= F1))+
  geom_boxplot() +
  geom_point()+
  theme_classic() +
  theme(axis.ticks.x = element_blank(),
        axis.text.x = element_text(angle = 45, hjust = 1),
        axis.title.x = element_blank(),
        plot.title = element_text(hjust = 0.5)) +
  #  stat_pvalue_manual(p, label = "p", tip.length = 0.005,
  #                     y.position = c(1)) +
  ylim(0, 1)+
  ggtitle("DESeq2")

monocle.plot<- ggplot(monocle, aes(x = trial_label, y= F1))+
  geom_boxplot() +
  geom_point()+
  theme_classic() +
  theme(axis.ticks.x = element_blank(),
        axis.text.x = element_text(angle = 45, hjust = 1),
        plot.title = element_text(hjust = 0.5)) +
  xlab("Sample sizes of condition 1 vs. condition 2")+
  #  stat_pvalue_manual(p, label = "p", tip.length = 0.005,
  #                     y.position = c(1)) +
  ylim(0, 1)+
  ggtitle("Monocle3")


ggarrange(degage.old.plot, degage.new.plot, wilcox.plot, edger.plot, deseq2.plot, monocle.plot)


#signficiance testing between degage trials
format_p_value <- function(p_val) {
  return(round(p_val, 3))  # Adjust the number of digits as needed
}
degage.all <- rbind(degage.new, degage.old)
f1 <- ggplot(degage.all, aes(x= trial_label, y = F1, color = test)) +
  geom_boxplot(fill = "white") +
  theme_classic()+
  stat_compare_means(method = "t.test", label = "p.format", size = 3, label.x = 1.5, label.y = 1.05) +
  theme(axis.ticks.x = element_blank(),
        axis.text.x = element_text(angle = 45, hjust = 1),
        axis.title.x = element_blank(),
        plot.title = element_text(hjust = 0.5)) +
  ylim(0, 1.05)+
  scale_color_manual(name = "DEGage Version", values = c("darkseagreen", "thistle4"))

sens <- ggplot(degage.all, aes(x= trial_label, y = Sensitivity, color = test)) +
  geom_boxplot(fill = "white") +
  theme_classic()+
    stat_compare_means(method = "t.test", label = "p.format", size = 3, label.x = 1.5, label.y = 1.05)  +
    theme(axis.ticks.x = element_blank(),
          axis.text.x = element_text(angle = 45, hjust = 1),
          axis.title.x = element_blank(),
          plot.title = element_text(hjust = 0.5)) +
  ylim(0, 1.05)+
  scale_color_manual(name = "DEGage Version", values = c("darkseagreen", "thistle4"))

spec <- ggplot(degage.all, aes(x= trial_label, y = Specificity, color = test)) +
  geom_boxplot(fill = "white") +
  theme_classic()+
  stat_compare_means(method = "t.test", label = "p.format", size = 3, label.x = 1.5, label.y = 1.05) +
  theme(axis.ticks.x = element_blank(),
        axis.text.x = element_text(angle = 45, hjust = 1),
        axis.title.x = element_blank(),
        plot.title = element_text(hjust = 0.5)) +
  ylim(0, 1.05)+
  scale_color_manual(name = "DEGage Version", values = c("darkseagreen", "thistle4"))

p1 <- ggarrange(f1, sens, spec, ncol = 3, common.legend = T)
p2 <- ggarrange(wilcox.plot, edger.plot, deseq2.plot, ncol = 3)
ggarrange(p1, p2, nrow = 2)


#make supplementary table
supp <- data.frame(q = c(0,0,0,0,0))
for(o in order){
  x <- c()
  x <- c(x, mean(degage.new[degage.new$trial_label == o,]$F1))
  x <- c(x, mean(degage.old[degage.old$trial_label == o,]$F1))
  x <- c(x, mean(wilcox[wilcox$trial_label == o,]$F1))
  x <- c(x, mean(edger[edger$trial_label == o,]$F1))
  x <- c(x, mean(deseq2[deseq2$trial_label == o,]$F1))
  temp <- data.frame(q = x)
  colnames(temp) <- o
  supp <- cbind(supp, temp)
}

  x <- c()
  y <- c()
for(o in order){

  x <- c(x, sd(degage.new[degage.new$trial_label == o,]$F1))
  y <- c(y, sd(degage.old[degage.old$trial_label == o,]$F1))
}
  supp <- supp[,-1]


supp <- rbind(supp, x)
supp <- rbind(supp, y)
rownames(supp)[c(6,7)] <- c("Random Assignment SD", "Subsampling SD")

rownames(supp) <- c("Random Assignment", "Subsampling", "Wilcoxon", "Edger", "DESeq2")
write.csv(supp, "~/Documents/DEGage_stuff/DEGage_Testing/reviewer_figures/imbalanceF1s.csv")


