library(dplyr)

create_pa_matrix <- function(df_subset) {
  # this only works up to tri-allelic loci (reference allele + 2 derived alleles)
  reference_col <- df_subset$astCal_path
  pa_matrix <- matrix(0, nrow = nrow(df_subset), ncol = ncol(df_subset))
  
  for (row in 1:nrow(df_subset)) {
    first_derived_allele <- NULL
    for (col in 1:ncol(df_subset)) {
      if (df_subset[row, col] != reference_col[row]) {
        if (is.null(first_derived_allele)) {
          first_derived_allele <- df_subset[row, col]
        }
        if (df_subset[row, col] == first_derived_allele) {
          pa_matrix[row, col] <- 1    
        } else {
          pa_matrix[row, col] <- 2  
        }
      }
    }
  }
  pa_matrix <- as.data.frame(pa_matrix)
  colnames(pa_matrix) <- colnames(df_subset)
  return(pa_matrix)
}

create_pa_matrix_ranked <- function(df_subset) {
  # create pav matrix, which ranks alleles by increasing length
  pa_matrix <- matrix(0, nrow = nrow(df_subset), ncol = ncol(df_subset))
  for (row in 1:nrow(df_subset)) {
    x_factor <- factor(as.numeric(df_subset[row,]))
    mapping <- seq(length(levels(x_factor)))
    pa_matrix[row,] <- mapping[as.numeric(x_factor)] - 1
  }
  pa_matrix <- as.data.frame(pa_matrix)
  colnames(pa_matrix) <- colnames(df_subset)
  return(pa_matrix)
}