Plot results
or_data$invasive <- (or_data$pvalue < 0.05) & (log(or_data$logOR) > 0)
or_data$GPSC <- as.character(or_data$GPSC)
lineage_size <- map_dbl(fits, ~nrow(.x$pa))
or_data_subset <- or_data[or_data$GPSC %in% names(lineage_size)[lineage_size >= 0],
]
# or_data_subset <- or_data_subset[or_data_subset$pvalue<0.1,]
fit_subset <- fits[names(fits) %in% or_data_subset$GPSC]
comp <- compare_pangenome_covariates(fit_subset, or_data_subset, keep = "invasive",
nboot = 100)
comp$summary
#> # A tibble: 4 × 7
#> term estimate std.error statistic p.value `bootstrap CI …` `bootstrap CI …`
#> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 depth… -1.57e-3 0.000162 -9.69 3.77e-22 -0.00199 -0.00117
#> 2 istip… -1.29e-1 0.0439 -2.92 3.45e- 3 -0.225 0.00610
#> 3 core:… -9.99e-4 0.000451 -2.21 2.68e- 2 -0.00552 0.00313
#> 4 istip… -3.54e-3 0.000625 -5.66 1.50e- 8 -0.0142 0.00297
pdf <- comp$model$model %>% as_tibble()
pdf$invasive <- ifelse(pdf$invasive, "Invasive", "Not invasive")
ggplot(pdf %>% filter(istip), aes(x = invasive, y = acc, col = invasive)) + ggbeeswarm::geom_quasirandom(alpha = 0.2,
width = 0.2) + geom_boxplot(outlier.colour = NA, fill = NA, col = "#4d4d4d") +
scale_colour_manual(values = c("#e41a1c", "#377eb8")) + theme_clean(base_size = 20) +
theme(plot.background = element_blank(), legend.background = element_blank(),
legend.position = "none") + theme(panel.border = element_blank(), axis.line = element_line()) +
ylab("accessory gene gain/loss events at tip") + xlab("")

ggsave("./figures/pneumo_tip_acc_count_histogram.pdf", width = 10, height = 7)
ggsave("./figures/pneumo_tip_acc_count_histogram.png", width = 10, height = 7)
pdf <- tibble(core_estimate = map_dbl(fit_subset, ~.x$summary$estimate[.x$summary$term ==
"core"]), invasive = ifelse(or_data_subset$invasive[match(names(fit_subset),
or_data_subset$GPSC)], "Invasive", "Not Invasive"))
ggplot(pdf, aes(x = invasive, y = core_estimate, colour = invasive)) + geom_boxplot(outlier.color = NA,
fill = NA) + ggbeeswarm::geom_quasirandom(width = 0.2, size = 2) + scale_colour_manual(values = c("#e41a1c",
"#377eb8")) + theme_clean(base_size = 20) + theme(plot.background = element_blank(),
legend.background = element_blank(), legend.position = "none") + theme(panel.border = element_blank(),
axis.line = element_line()) + ylab("Estimated core coefficient") + xlab("")

ggsave("./figures/pneumo_core_estimate_boxplot.pdf", width = 10, height = 7)
ggsave("./figures/pneumo_core_estimate_boxplot.png", width = 10, height = 7)
dist_df <- imap_dfr(trees, ~{
d <- cophenetic.phylo(.x)
d[lower.tri(d, diag = TRUE)] <- NA
d <- matrixStats::colMins(d, na.rm = TRUE)
tibble(GPSC = .y, distance = d)
})
dist_df <- dist_df[dist_df$GPSC %in% or_data_subset$GPSC, ]
dist_df$invasive <- or_data_subset$invasive[match(dist_df$GPSC, or_data_subset$GPSC)]
dist_df$invasive <- ifelse(dist_df$invasive, "Invasive", "Not invasive")
ggplot(dist_df %>% filter(distance < 50), aes(x = distance, fill = invasive)) + geom_histogram() +
facet_wrap(~invasive, ncol = 1, scales = "free_y") + scale_fill_manual(values = c("#e41a1c",
"#377eb8")) + theme_clean(base_size = 20) + theme(plot.background = element_blank(),
legend.background = element_blank(), legend.position = "none") + theme(panel.border = element_blank(),
axis.line = element_line()) + xlab("distance to nearest neighbour in phylogeny (years)")

ggsave("./figures/GPSC_nn_histogram.pdf", width = 12, height = 7)
ggsave("./figures/GPSC_nn_histogram.png", width = 12, height = 7)
trees_sig <- trees[names(trees) %in% or_data_subset$GPSC]
ggs <- imap(trees_sig, ~{
colour <- ifelse(.y %in% or_data_subset$GPSC[or_data_subset$invasive], "#e41a1c",
"#377eb8")
gg <- ggtree(.x, col = colour) + ggtree::theme_tree2() + xlim_tree(xlim = 500) +
ggtitle(paste("GPSC", .y))
return(gg)
})
patchwork::wrap_plots(ggs)

ggsave("./figures/GPSC_phylogenies_sigOR.pdf", width = 12, height = 10)
ggsave("./figures/GPSC_phylogenies_sigOR.png", width = 12, height = 10)