library(tidyverse)

mg_df <- bind_rows(
  lapply(
    list.files("data/calls/", full.names = T, pattern = ".calls$"),
    function(p) {
      read_tsv(p,
        lazy = F, progress = T, show_col_types = F,
        col_names = c("chrom", "start", "end", "source", "sink", "call")
      ) %>%
        mutate(
          Sample = str_remove(basename(p), ".fasta.gz.calls.gz"),
        ) %>%
        separate("call", into = c("Path", "Length", "Strand", "Contig", "Contig_start", "Contig_end"), sep = ":")
    }
  )
)

## merged_df <- pivot_wider(mg_df %>% mutate(Allele = Path),
##   id_cols = c(chrom, start, end, source, sink, Allele),
##   names_from = "Sample",
##   values_from = Path,
##   values_fill = "0"
## ) %>%
##   filter(Allele != ".") %>%
##   mutate_at(
##     vars(-c("chrom", "start", "end", "source", "sink", "Allele")),
##     function(x) {
##       ifelse(x != "0", 1, ifelse(x == "0", 0, -1))
##     }
##   ) %>%
##   mutate(Freq = rowSums(across(-c(chrom, start, end, source, sink, Allele))))

## genotypes_df <- read_tsv("mg_methyl.tsv.gz")

node_df <- bind_rows(
  lapply(
    list.files("data/calls/joined/", full.names = T, pattern = ".nodes$"),
    function(p) {
      read_tsv(p,
        lazy = F, progress = T, show_col_types = F,
        col_names = c("node")
      ) %>%
        mutate(Sample = str_remove(basename(p), ".nodes"))
    }
  )
)

write_tsv(node_df, "data/calls/joined/samples.nodes.gz")
