#compare_repTiming.R
rm(list = ls())

library(tidyverse)
library(ggplot2)
library(dplyr)
library(tidyr)
library(data.table)
library(tibble)
library(ggpubr)

colors = c("#E69F00", "#666699")
setwd("/PATH/TO/WORKING/DIR")

##########################################################################################
#import the table of DNMs
##########################################################################################
dnms_liftover <- read.delim(file = "mm39_mm10_liftover_finalDNMs.bed", header = F)
colnames(dnms_liftover) <- c("mm10_chr", "mm10_start", "mm10_end")

dnms <- read.delim(file = "MpileupDeepvariant_RepeatRemoved_35bpRemoved_BlaclListGenes_Homopolymer_ART_NAT_FINAL.tab")
dnms <- cbind(dnms, dnms_liftover)

dim(dnms)

dnms <- dnms %>% select(mm10_chr, mm10_start, sample, cohort) %>% 
  group_by(mm10_chr, mm10_start, cohort) %>% 
  pivot_wider(names_from = sample, values_from = sample)

##########################################################################################
#import the table of repli-seq data from: https://doi.org/10.1016/j.cell.2021.06.025
##########################################################################################

repdata <- read.delim(file = "rep_timing/1-s2.0-S0092867421007935-mmc5", comment.char = "#")
repdata <- repdata %>% 
  select(c("cs", "from", "to", "expRT_E14_Dey_R1_NA_NA_NA_mm10", "expRT_ESC_ALL_S1_2to4C_NA_NA_mm10"))

#add rep data to dnm data
esc_d14 <- numeric(length(dnms$mm10_chr))
esc_all <- numeric(length(dnms$mm10_chr))
for (row in 1:dim(dnms)[1]) {
  overlap <- repdata %>% filter(cs == dnms$mm10_chr[row]) %>%
                         filter(dnms$mm10_start[row] >= from) %>% 
                         filter(dnms$mm10_start[row] <= to)
  
  if (dim(overlap)[1] == 1) {
    esc_d14[row] = overlap$expRT_E14_Dey_R1_NA_NA_NA_mm10
    esc_all[row] = overlap$expRT_ESC_ALL_S1_2to4C_NA_NA_mm10
  } else {
    esc_d14[row] = NA
    esc_all[row] = NA
  }
}


dnms <- dnms %>% ungroup() %>% 
  mutate(esc_d14 = esc_d14, esc_all = esc_all)
dnms <- dnms %>% select(1:3, 32:33)

########################################################################################## 
#sanity check: ask if mutation rates are increased in late replicating regions
########################################################################################## 
repdata <- repdata %>% filter(expRT_E14_Dey_R1_NA_NA_NA_mm10 != 0) 

p <- ggplot(data = repdata, mapping = aes(x = expRT_E14_Dey_R1_NA_NA_NA_mm10)) + 
  geom_density(fill = colors[1], alpha = 0.5) + 
  theme_classic(base_size = 14) +
  theme(legend.position = "none") + 
  xlab("Replication Timing") + ylab("Density") +
  ggtitle("Dey et al. 2015")

p <- p + geom_density(data = dnms, mapping = aes(esc_d14), 
                 fill = colors[2], alpha = 0.5)


q <- ggplot(data = repdata, mapping = aes(x = expRT_ESC_ALL_S1_2to4C_NA_NA_mm10)) + 
  geom_density(fill = colors[1], alpha = 0.5) + 
  theme_classic(base_size = 14) +
  theme(legend.position = "none") + 
  xlab("Replication Timing") + ylab("Density") +
  ggtitle("Pratto et al. 2021")

q <- q + geom_density(data = dnms, mapping = aes(esc_all), 
                      fill = colors[2], alpha = 0.5)



#as violin plots
tmp1 <- dnms %>% select(esc_d14) %>% mutate(category = "dnSNV") %>%
  rename(value = esc_d14)
tmp2 <- repdata %>% select(expRT_E14_Dey_R1_NA_NA_NA_mm10) %>%
  mutate(category = "genome") %>%
  rename(value = expRT_E14_Dey_R1_NA_NA_NA_mm10)
tmp <- bind_rows(tmp1, tmp2)

r <- ggplot(tmp, aes(x = category, y = value, group = category, fill = category)) + 
  #geom_jitter(color="gray25", size=0.4) + 
  geom_violin() + 
  geom_boxplot(outlier.shape = NA, width = 0.2) + 
  scale_fill_manual(values = colors) +
  theme_classic(base_size = 14) +
  theme(legend.position = "none") + 
  xlab("") + ylab("Replication Timing") +
  ggtitle("Dey et al. 2015")
r <- r + stat_compare_means(label.x = 0.75)
wilcox.test(tmp1$value, tmp2$value)


tmp1 <- dnms %>% select(esc_d14) %>% mutate(category = "dnSNV") %>%
  rename(value = esc_d14)
tmp2 <- repdata %>% select(expRT_ESC_ALL_S1_2to4C_NA_NA_mm10) %>%
  mutate(category = "genome") %>%
  rename(value = expRT_ESC_ALL_S1_2to4C_NA_NA_mm10)
tmp <- bind_rows(tmp1, tmp2)

s <- ggplot(tmp, aes(x = category, y = value, group = category, fill = category)) + 
  #geom_jitter(color="gray25", size=0.4) + 
  geom_violin() + 
  geom_boxplot(outlier.shape = NA, width = 0.2) + 
  scale_fill_manual(values = colors) +
  theme_classic(base_size = 14) +
  theme(legend.position = "none") + 
  xlab("") + ylab("Replication Timing") +
  ggtitle("Pratto et al. 2021")
s <- s + stat_compare_means(label.x = 0.75)

wilcox.test(tmp1$value, tmp2$value)

########################################################################################## 
#ask whether replication time differs between ART and natural DNMs
########################################################################################## 

ivf <- dnms %>% filter(cohort == "ART")
natural <- dnms %>% filter(cohort == "Natural")

wilcox.test(ivf$esc_d14, natural$esc_d14)
wilcox.test(ivf$esc_all, natural$esc_all)


#plot
a <- ggplot(dnms, aes(x = cohort, y = esc_d14, group = cohort, fill = cohort)) + 
  #geom_jitter(color="gray25", size=0.4) + 
  geom_violin() + 
  geom_boxplot(outlier.shape = NA, width = 0.2) + 
  scale_fill_manual(values = colors) +
  theme_classic(base_size = 14) +
  theme(legend.position = "none") + 
  xlab("") + ylab("Replication Timing") +
  ggtitle("Dey et al. 2015")
a <- a + stat_compare_means(label.x = 0.75)


b <- ggplot(dnms, aes(x = cohort, y = esc_all, group = cohort, fill = cohort)) + 
  #geom_jitter(color="gray25", size=0.4) +  
  geom_violin() + 
  geom_boxplot(outlier.shape = NA, width = 0.2) +
  scale_fill_manual(values = colors) +
  theme_classic(base_size = 14) +
  theme(legend.position = "none") + 
  xlab("") + ylab("Replication Timing") +
  ggtitle("Pratto et al. 2021")
b <- b + stat_compare_means(label.x = 0.75)


ggarrange(r,s, a,b, ncol = 2, nrow = 2, labels = "AUTO")
ggsave(filename="rep_timing/repTiming_boxplots_fourPanels.pdf", height = 8, width = 8)

#rmarkdown::render("compare_repTiming.R")
