#############################################################
### Plots of epinano results for eligos2 identified sites ###
#############################################################

######################
### Load Packages ####
######################

pkgs <- c("backports","tidyverse","here","skimr","dplyr", "ggplot2", "ggsci","ggforce",
          "janitor","readxl","xlsx", "MetBrewer","ggrepel", "usethis", "ggpubr", "rstatix")

lapply(pkgs, library, character.only = TRUE)

# Task 1.1: Read the processed files

default <- read_tsv(here("data","epinano_human_eligos2_sites","processed","default_processed.txt"),)

ivt <- read_tsv(here("data","epinano_human_eligos2_sites","processed","ivt_processed.txt"))

master_df <- rbind(default,ivt)

master_df$is_DRACH <- factor(master_df$is_DRACH, levels = c("1","0"))


# Task 1.2: Save custom theme 

t <- theme(
  legend.title = element_blank(),
  legend.text = element_text( size = 15),
  legend.position = "bottom",
  axis.text = element_text(size=15),
  axis.title=element_text(size=15),
  panel.grid.major.y = element_line(color = "grey90"),
  panel.grid.minor.y = element_line(color = "grey90"),
  panel.border = element_rect(colour = "black", fill = NA),
  strip.text.x = element_text(size = 15)
)  

########################################################
### Task 2: Plot DRACH vs. non-DRACH on human sites ####
########################################################

### Task 2.1: Calculate pval

stat.test.DRACH <- master_df %>% 
  group_by(is_DRACH) %>% 
  wilcox_test(delta_sumerr ~ model, ref.group = "default") %>% 
  adjust_pvalue(method = "hochberg") %>%
  add_significance("p.adj")
stat.test.DRACH

# add x_y_position

stat.test.DRACH <- stat.test.DRACH %>%
  add_xy_position(x = "model", dodge = 0.8)
stat.test.DRACH$y.position <- stat.test.DRACH$y.position + 0.05


### Task 2.3: Generate boxplots

library(EnvStats)
library(ggrepel)

master_df %>% 
  ggplot(aes(x=model, y =delta_sumerr, fill = model)) +
  geom_boxplot(aes(fill = model),
               width=0.25, color="grey20",position = position_dodge(width =0.85),
               notch = TRUE, alpha = 0.9, lwd = 0.4) +
  geom_violin(aes(fill = model), position=position_dodge(width =0.85),
              alpha = 0.5, show.legend = F, color = NA, scale = "width", width = 0.7) +
  facet_wrap(~is_DRACH, labeller = as_labeller(c(`1` = "DRACH",`0` = "non-DRACH"))) +
  stat_n_text(size = 5) +
  stat_compare_means(comparisons = list(c("default","ivt")), tip.length = .02, size = 5, label = "p.signif") +
  scale_fill_manual(values = c('#E64B35FF',"#00A087FF")) +
  labs(x="",y=expression(Delta*"SumErr")) +
  theme_pubr() + t +
  theme(axis.text.x = element_text(size = 15,face = "bold"),
        legend.text = element_text( size = 15))

ggsave(filename = "DRACH_vs_nonDRACH.pdf",
       plot = last_plot(),
       path = here("results_Gregor","plots","human_in_vivo","epinano_on_eligos2_sites"),
       width =3,
       height = 4,
       units = "in")


########################################################
### Task 3: Plot individual motifs of in-vivo sites ####
########################################################

master_df_DRACH <-  master_df %>%  
  group_by(model) %>% 
  # Filter for single A, DRACH containing and select 4 random non-DRACH
  filter( `5mer` %in% c("GGACT","GGACA","GGACC","AGACT"))

master_df_DRACH$`5mer` <- factor(master_df_DRACH$`5mer`, levels = c("GGACT","GGACC","GGACA","AGACT"))

# Task 3.1 Calculate pval

stat.test.paired <- master_df_DRACH %>% 
  group_by(`5mer`) %>% 
  wilcox_test(delta_sumerr ~ model) %>% 
  adjust_pvalue(method = "hochberg") %>%
  add_significance("p.adj")
stat.test.paired


master_df_DRACH %>% 
  ggplot(aes(x=model, y =delta_sumerr, fill = model)) +
  geom_boxplot(aes(fill = model),
               width=0.25, color="grey20",position = position_dodge(width =0.85),
               notch = TRUE, alpha = 0.9, lwd = 0.4) +
  geom_violin(aes(fill = model), position=position_dodge(width =0.85),
              alpha = 0.5, show.legend = F, color = NA, scale = "width", width = 0.7) +
  facet_wrap(~`5mer`, nrow = 1) +
  stat_n_text(size = 5) +
  stat_compare_means(comparisons = list(c("default","ivt")), tip.length = .02, size = 5, label = "p.signif") +
  scale_fill_manual(values = c('#E64B35FF',"#00A087FF")) +
  labs(x="",y=expression(Delta*"SumErr")) +
  theme_pubr() + t +
  theme(axis.text.x = element_text(size = 15,face = "bold"),
        legend.text = element_text( size = 15))

ggsave(filename = "top4_DRACH_motifs.pdf",
       plot = last_plot(),
       path = here("results_Gregor","plots","human_in_vivo","epinano_on_eligos2_sites"),
       width =5.5,
       height = 3.5,
       units = "in")

####################################################
### Task 3: Plot EpiNano-scores vs. Glori-score ####
####################################################

master_df %>% 
  # keep only GLORI-matches
  drop_na() %>% 
  ggplot(aes(x = mean_glori_score, y = delta_sumerr, color = model)) +
  geom_point(alpha = 0.1, size = 1.5) +
  geom_smooth(aes(fill = model, color = model)) +
  scale_color_manual(values = c('#E64B35FF',"#00A087FF")) +
  scale_fill_manual(values = c('#E64B35FF',"#00A087FF")) +
  labs(x="Absolute m6A-level [GLORI-score]",y=expression(Delta*"SumErr")) +
  theme_pubr() + t +
  theme(axis.text.x = element_text(size = 15),
        legend.text = element_text( size = 15),
        legend.position = "bottom",
        panel.grid.major.x = element_line(color = "grey90"),
        panel.grid.minor.x = element_line(color = "grey90"))
  
ggsave(filename = "dsumerr_vs_glori_scors.pdf",
       plot = last_plot(),
       path = here("results_Gregor","plots","human_in_vivo","epinano_on_eligos2_sites"),
       width =4,
       height = 4,
       units = "in")

