# Loading mutation call data
library(stringr)
master.mat <- read.table('../../data/CO8/CO8.master_matrix.txt')
cell.mat <- master.mat[,-grep("pop", colnames(master.mat))]
colnames(cell.mat) <- str_replace_all(colnames(cell.mat), "\\.", "_")
rownames(cell.mat) <- str_replace_all(rownames(cell.mat), "[-:]", "_")


# Defining bridge mutations
bridge.mutations <- c("CHN1_chr2_175779934", "FHIT_chr3_60412480",
                      "ATP7B_chr13_52534322", "APC_chr5_112175328")



# Loading tree data
library(sna)
met1.parent <- "LINGO2_chr9_29123273"
met2.parent <- "NR4A3_chr9_102595561"
prim.parent <- "LINGO2_chr9_29178757"
dot.file <- readLines("CO8_ml0.renamed.gv")
scite.tree <- read.dot(textConnection(str_replace_all(
              dot.file, '[ ;]', '')))
distmat <- geodist(scite.tree, count.paths=FALSE)$gdist
colnames(distmat) <- colnames(scite.tree)
rownames(distmat) <- rownames(scite.tree)
descendents <- function(mut)
{
  descendent.nodes <- colnames(distmat)[!is.infinite(distmat[mut,])]
  descendent.cells <- descendent.nodes[descendent.nodes %in% colnames(cell.mat)]
  return(descendent.cells)
}
met1 <- descendents("LINGO2_chr9_29123273")
met2 <- descendents("NR4A3_chr9_102595561")
prim <- descendents("LINGO2_chr9_29178757")



# Loading allelic counts
library(data.table)
bridge.chrom <- str_extract(bridge.mutations, "chr\\d+")
bridge.pos <- as.integer(str_extract(bridge.mutations, "\\d+$"))
exome.ad.full <- fread("../../data/CO8/exome_AD.txt", verbose=FALSE)
is.cell <- grep("^\\w+-\\d+\\.AD", colnames(exome.ad.full))
cell.name.matches <- str_match(colnames(exome.ad.full)[is.cell], "^(\\w+)-(\\d+)\\.AD$")
colnames(exome.ad.full)[is.cell] <- sprintf("%s_%s", cell.name.matches[,2], cell.name.matches[,3])

bridge.ad.full <- with(exome.ad.full, exome.ad.full[CHROM %in% bridge.chrom & POS %in% bridge.pos,])




# Function used later on
unpack.ad <- Vectorize(function(x)
{
  if (is.na(x)) return(c(0,0))
  pair.as.char <- as.character(x)
  match <- str_match(pair.as.char, '^(\\d+),(\\d+)')
  if (is.na(match[,1])) stop(sprintf("%s isn't a string that's a pair of numbers separated by a comma", pair.as.char))
  return(as.integer(match[,-1]))
})


# Getting rough probability estimates for coloring purposes
library(rjags)
load.module("mix")

mut.mat.full <- data.frame(bridge.ad.full)
mut.mat <- mut.mat.full[,-(1:13)]
is.diploid <- str_detect(colnames(mut.mat), "D")

probs <- function(i)
{
  this.data <- list(na=sum(!is.diploid), ma=apply(unpack.ad(mut.mat[i,!is.diploid]), 2, sum), aa=unpack.ad(mut.mat[i,!is.diploid])[1,],
                    nd=sum(is.diploid),  md=apply(unpack.ad(mut.mat[i, is.diploid]), 2, sum), ad=unpack.ad(mut.mat[i, is.diploid])[1,])
  model <- jags.model("mutation_caller.bugs", data=this.data)
  samples <- jags.samples(model, "has.mut", 10^4)
  apply(samples$has.mut, 1, mean)
}

prob.mat <- sapply(1:4, probs)
prob.df <- cbind(mut.mat.full[,1:2],data.frame(t(prob.mat)))
colnames(prob.df) <- c(colnames(mut.mat.full)[c(1,2)], colnames(mut.mat)[!is.diploid])


# Output
write.table(mut.mat.full, "read_counts.txt", quote=FALSE, row.names=FALSE)
write.table(prob.df, "probabilities.txt", quote=FALSE, row.names=FALSE)
write(prim, "prim.txt"); write(met1, "met1.txt"); write(met2, "met2.txt")