##Combined Figure

library(tidyverse)
library(ggfortify)
library(ggpubr)
library(vegan)

citation("vegan")

####TRIPCA####
weights <- read.csv("trinucleotide_context.csv") %>% 
  select(-ART, -Natural, -overall)

weights$Sample <- weights$X
weights <- dplyr::select(weights, -X)
samples <- weights$Sample
weights <- weights %>% select(-Sample) %>% t()
names(weights) <- samples
weights <- data.frame(weights)

names(weights) <- samples
weights$Sample <- rownames(weights)

weights$sex <- ifelse(grepl("F\\.", weights$Sample), "Female", "Male")
weights$type <- ifelse(grepl("\\.8.$", weights$Sample), "ART", "Natural")
weights_num <- dplyr::select(weights, starts_with("Signature"))
weights %>% group_by(sex, type) %>% tally()

weights_num <- dplyr::select(weights, Sample, sex,type, everything()) %>% dplyr::select(4:last_col())

names(weights_num) <- gsub("([ATCG])([ATCG]) ([ATCG])\\.([ATCG])", 
                           "\\3[\\1>\\2]\\4",
                           names(weights_num))

pca <- weights_num %>%
  prcomp()
tri_pca <- autoplot(pca,
         data=weights,
         label=F,
         loadings=T,
         loadings.colour = 'darkgrey',
         loadings.label.color ="black",
         loadings.label = TRUE,
         color="type",
         shape="sex") +
  theme_bw() + 
  scale_color_manual(values = c("#E69F00", "#666699"))+
  theme(legend.position = "top") +
  labs(shape="", color="")



####HEATMAP####
heatmap <- readRDS("figures/heatmap_with_sig.Rds")

####Common_Signatures####
signatures <- read.table("Assignment_Solution_Activities.txt", header=T) %>% 
  select(Sample = Samples, 
         everything()) %>% 
  filter(Sample %in% c("ART", "Natural")) %>% 
  pivot_longer(2:last_col()) %>% 
  group_by(name) %>%
  filter(sum(value) > 0) %>%
  ungroup() %>% 
  group_by(Sample) %>% 
  mutate(value=value/sum(value))
unique(signatures$name)
sbs <- data.frame(name = unique(signatures$name),
                  SBS = c("SBS1\nCytosine Deamination",
                          "SBS5\nUnknown",
                          "SBS30\nBase excision repair",
                          "SBS84\nLymphoid Signature",
                          "SBS85\nLymphoid Signature",
                          "SBS89\nUnknown"
                          ))
common_signatures <- signatures %>% 
  merge(sbs, by="name")%>%
  ggplot(aes(y=reorder(SBS, value), x=value, fill=Sample)) +
  geom_bar(stat="identity", position="dodge") +
  scale_fill_manual(values = c("#E69F00", "#666699")) + 
  theme_bw() + 
  ylab("Signature") +
  xlab("Proportion of Mutations") +
  scale_x_continuous(labels=c(0,0.3,0.6),
                     breaks=c(0,0.3,0.6),
                     lim=c(0,0.61))
common_signatures
####Motif Plot####
dat <- bind_rows(read.csv("Natural_05_08.gzIVF_spectrum.csv") %>%
                   mutate(type="Natural") %>% 
                   select(everything(), value=Natural),
                 read.csv("ART_05_08.gzIVF_spectrum.csv") %>% mutate(type="ART") %>% 
                   select(everything(), value=ART)) %>% 
  separate(X, into=c("Alteration", "Context"), sep=" ") %>%
  mutate(Alteration = gsub("(.)(.)", "\\1>\\2", Alteration))

motif<-dat %>% 
  ggplot(aes(x=Context,y=value,fill=type)) +
  geom_bar(stat="identity", position="dodge") +
  facet_wrap(~Alteration, nrow=1) + 
  theme_classic() + 
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) + 
  scale_fill_manual(values = c("#E69F00", "#666699")) +
  ylab("Contribution") + 
  xlab("Motif")+
  theme(legend.position = "none")

####COMBINED####
ggarrange(ggarrange(nrow=2,
                    ggarrange(ggarrange(tri_pca,
                              common_signatures + theme(legend.position = "none"), 
                              labels=c("A", "B"), common.legend=T),
                              heatmap,
                              widths=c(2,0.7),
                              nrow=1, labels=c(NA, "C")),
                    ggarrange(motif,
                      nrow=1, labels=c("D")), heights=c(1,0.5))) +
  bgcolor("white") +
  border("white") + 
  theme(legend.background = element_rect("white"),
        panel.background = element_rect("white"),
        plot.background = element_rect(fill = "white", 
                                       color = "white"),
        plot.margin = unit(x = c(0, 0, 0, 0), units = "mm"))

ggsave("figures/combined.png", height=7,width=10.5, bg="white")
 
####Overall####
signatures <- read.table("Assignment_Solution_Activities.txt", header=T) %>% 
  select(Sample = Samples, 
         everything()) %>% 
  filter(Sample %in% c("overall")) %>% 
  pivot_longer(2:last_col()) %>% 
  group_by(name) %>%
  filter(sum(value) > 0) %>%
  ungroup() %>% 
  group_by(Sample) %>% 
  mutate(value=value/sum(value))
unique(signatures$name)
sbs <- data.frame(name = unique(signatures$name),
                  SBS = c("SBS1\nCytosine Deamination",
                          "SBS5\nUnknown",
                          "SBS30\nBase excision repair",
                          "SBS44\nMismatch Repair",
                          "SBS89\nUnknown"
                  ))
sbs

common_signatures <- signatures %>% 
  merge(sbs, by="name")%>%
  ggplot(aes(y=reorder(SBS, value), x=value)) +
  geom_bar(stat="identity", position="dodge") +
  theme_bw() + 
  ylab("Signature") +
  xlab("Proportion of Mutations") +
  scale_x_continuous(labels=c(0,0.3,0.6),
                     breaks=c(0,0.3,0.6),
                     lim=c(0,0.61))
common_signatures
ggsave("supplemental_figure_signatures.pdf", height=5, width=5, bg="white")
