# this needs to be run in this Docker container
# due to difficulty in installing some packages on ARM Macbooks
# https://github.com/NESCent/popgen-docker

# docker run -it --volume $PWD/temp_docker:/home hlapp/rpopgen bash


library(phangorn)
library(ape)
library(readr)


# read and prepare PA matrix
df_pa_matrix <- read_csv("pa_matrix_phylogenetic_ranked.csv")
pa_matrix <- t(as.matrix(df_pa_matrix))

# Convert PAV matrix to phyDat object
pa_phydat <- phyDat(as.matrix(pa_matrix), type = "USER", levels = c(0, 1, 2))

# Calculate the distance matrix
dist_matrix <- dist.hamming(pa_phydat)

# Build an initial tree using Neighbor-Joining
initial_tree <- midpoint(nj(dist_matrix))

# Perform bootstrapping
n_replicates <- 25
bootstrap_trees <- bootstrap.phyDat(pa_phydat, FUN = function(x) nj(dist.hamming(x)), bs = n_replicates)

# Calculate bootstrap support values
bootstrap_support <- prop.clades(initial_tree, bootstrap_trees)
bootstrap_support

# Plot the tree with bootstrap values
plot(initial_tree, main = "Phylogenetic Tree with Bootstrap Support")
add.scale.bar()
nodelabels(bootstrap_support, frame = "n")
dev.off()

saveRDS(list(tree = initial_tree, bootstrap_values = bootstrap_support), file = "pa_matrix_phylogenetic_tree_ranked.rds")
