library(GenomicRanges)
library(parallel)
library(tidyverse)
library(broom)
library(ComplexHeatmap)
library(ggsci)
library(ggrastr)
library(cowplot)
library(scales)
theme_set(theme_bw(base_size = 14))

bins_bed <- read_tsv("data/chm13v2_200bp_bins.tsv") %>%
  makeGRangesFromDataFrame()

qtls_df <- read_tsv("data/SV_mQTLs_p005.tsv.gz")

pvalues <- qtls_df %>%
  mutate(bin_chrom = str_remove(bin_chrom, "chr")) %>%
  group_by(bin_chrom, bin_start, bin_end) %>%
  slice_min(order_by = adj.p.value, with_ties = F) %>%
  mutate(bin_chr = factor(bin_chrom, levels = str_sort(unique(bin_chrom),
                                                       numeric = TRUE)))

sv_dist_p <- pvalues %>%
  mutate(sv_dist = min(abs(sv_start - bin_start), abs(sv_end - bin_start),
                       abs(sv_start - bin_end), abs(sv_end - bin_end))) %>%
  # bins within SV
  mutate(sv_dist = case_when(bin_start > sv_start & bin_end < sv_end ~ 0,
                   T ~ sv_dist)) %>%
  filter(adj.p.value < 0.05) %>%
    ggplot() +
    geom_histogram(aes(x = sv_dist)) +
    scale_x_continuous(breaks = c(0, 25e3, 50e3, 75e3, 100e3)) +
  labs(x = "Distance from QTL SV",
       title = "Distance between 5mCpG bins and SVs-QTLs",
       subtitle = "min(adj.pvalue) < 0.05")
ggsave("plots/bin_sv_dist_qtl.pdf", sv_dist_p)

all_pvalues_df <- lapply(
  list.files("data/qtl/raw", full.names = T),
  function(p) read_tsv(p) %>% select(p.value, adj.p.value)
) %>%
  bind_rows() %>%
  mutate(pvalue.bin = cut(p.value, breaks = seq(0, 1, by = 1/30)),
         adj.pvalue.bin = cut(adj.p.value, breaks = seq(0, 1, by = 1 / 30)))

pvalue_p <- all_pvalues_df %>%
  ## filter(!is.na(p.value)) %>%
  count(pvalue.bin) %>%
  ggplot() +
  geom_col(aes(x = pvalue.bin, y = n)) +
  labs(title = "P-value distribution of mQTL tests") +
  theme(axis.text.x = element_text(angle = -90))
ggsave("plots/pvalue_histogram.pdf", pvalue_p, width = 7, height = 7)
ggsave("plots/pvalue_histogram.png", pvalue_p, width = 7, height = 7)

pvalue_p <- all_pvalues_df %>%
  ## filter(!is.na(p.value)) %>%
  count(adj.pvalue.bin) %>%
  ggplot() +
  geom_col(aes(x = adj.pvalue.bin, y = n)) +
  labs(title = "Adjusted P-value distribution of mQTL tests") +
  theme(axis.text.x = element_text(angle = -90))
ggsave("plots/pvalue_adj_histogram.pdf", pvalue_p, width = 7, height = 7)
ggsave("plots/pvalue_adj_histogram.png", pvalue_p, width = 7, height = 7)

genome_df <- pvalues %>%
  filter(!str_detect(bin_chrom, "[YX]")) %>%
  group_by(bin_chr) %>%
  summarise(chr_len = max(bin_end)) %>%
  mutate(bin_chr = factor(bin_chr, levels = str_sort(unique(bin_chr), numeric = TRUE))) %>%
  arrange(bin_chr) %>%
  mutate(tot = cumsum(as.numeric(chr_len)) - as.numeric(chr_len)) %>%
  select(-chr_len)

manhatan_df <- left_join(pvalues %>% filter(!str_detect(bin_chrom, "chr[YX]")), genome_df) %>%
  arrange(bin_chr, bin_end) %>%
  mutate(bin_chr = factor(bin_chr, levels = str_sort(unique(bin_chr), numeric = TRUE))) %>%
  mutate(bp_cum = tot + (bin_start + bin_end) / 2)

axisdf <- manhatan_df %>%
  group_by(bin_chr) %>%
  summarize(center = (max(bp_cum) + min(bp_cum)) / 2) %>%
  mutate(bin_chr = factor(bin_chr, levels = str_sort(unique(bin_chr), numeric = TRUE))) 

chrom_colors <- rep(c("#E64B35FF", "#4DBBD5FF"), 11)
names(chrom_colors) <- sort(axisdf$bin_chr)

manhatan_p <- ggplot(manhatan_df, aes(bp_cum, y = -log10(adj.p.value))) +
  geom_point_rast(aes(color = as.factor(bin_chr)), alpha = 0.8, size = 1) +
  geom_hline(aes(yintercept = -log10(0.01))) +
  scale_x_continuous(label = axisdf$bin_chr, breaks = axisdf$center) +
  scale_y_continuous(limits = c(0, 75), expand = c(0, 0)) +
  scale_color_manual(values = chrom_colors) +
  ## sample(c(pal_npg()(10), pal_npg()(10), pal_npg()(2)))) +
  theme(
    legend.position = "none",
    panel.border = element_blank(),
    panel.grid.major.x = element_blank(),
    panel.grid.minor.x = element_blank()
    ## axis.text.x = element_text(angle = 90)
  ) +
  labs(x = "Chromosome", title = "mQTL bins across the genome",
       y = "Max -log10(adj.pvalue)")

ggsave("plots/manhatan.png", manhatan_p, height = 7, width = 15)
ggsave("plots/manhatan.pdf", manhatan_p, height = 7, width = 15)


effect_sizes_p <- pvalues %>%
  mutate(`Strongest effect` = case_when(
    estimate >= 0 ~ "Positive",
    estimate <= 0 ~ "Negative"
  )) %>%
  filter(!str_detect(bin_chr, "[XY]")) %>%
  filter(adj.p.value < 0.05) %>%
  ungroup() %>%
  count(bin_chr, `Strongest effect`) %>%
  mutate(bin_chr = factor(bin_chr, levels = str_sort(unique(bin_chr),
    numeric = TRUE
  ))) %>%
  ggplot() +
  geom_col(aes(
    x = bin_chr, y = n, color = `Strongest effect`,
    fill = `Strongest effect`
  )) +
  scale_y_continuous(labels = label_scientific()) +
  labs(
    x = "Chromosome", y = "Number of mQTL bins",
    subtitle = "min(adj.pvalue) < 0.05",
    title = "Number of mQTL bins across chromosomes"
  ) +
  theme(
    legend.position = "top",
    ## panel.border = element_blank(),
    panel.grid.major.x = element_blank(),
    panel.grid.minor.x = element_blank(),
  ) +
  scale_fill_npg() +
  scale_color_npg()
ggsave("plots/mQTLs_chromosomes.pdf", effect_sizes_p, height = 7, width = 10)

effect_sizes_violin_p <- pvalues %>%
  mutate(Effect = case_when(estimate >= 0 ~ "Positive",
                            estimate <= 0 ~ "Negative")) %>%
  filter(!str_detect(bin_chr, "[XY]"))  %>%
  filter(adj.p.value < 0.05) %>%
  ggplot() +
  geom_violin(aes(x = bin_chr, y = abs(estimate), fill = Effect)) +
  scale_y_continuous(labels = label_scientific()) +
  labs(x = "Chromosome", y = "Strongest effect on bin",
       subtitle = "min(adj.pvalue) < 0.05",
       title = "Effects on mQTL bins across chromosomes") +
  theme(legend.position = "top",
        ## panel.border = element_blank(),
        panel.grid.major.x = element_blank(),
        panel.grid.minor.x = element_blank()) +
  scale_fill_npg()

ggsave("plots/mQTLs_chromosomes_violin.pdf", effect_sizes_violin_p, height = 7, width = 10)

qtls_fig <- plot_grid(manhatan_p,
  plot_grid(effect_sizes_p, sv_dist_p, ncol = 2, nrow = 1, labels = c("B", "C")),
  nrow = 2, labels = c("A", "")
)
ggsave("figures/mQTLs.pdf", qtls_fig, height = 12, width = 12)
