library(sna)
library(stringr)
library(argparse)

parser <- ArgumentParser()
parser$add_argument("-g", "--graph", help="Input dot file")
parser$add_argument("-o", "--outdot", help="Output dot file")
parser$add_argument("-m", "--matrix", help="Master matrix")
parser$add_argument("-t", "--table", help="Output table of old/new gene names")

args <- parser$parse_args(commandArgs(trailingOnly=TRUE))

ingraph <- args$graph
inmat <- args$matrix
outfile <- args$outdot
outtable <- args$table

dot.file <- readLines(ingraph)
scite.tree <- read.dot(textConnection(str_replace_all(
              dot.file, '[ ;]', '')))

distmat <- geodist(scite.tree, count.paths=FALSE)$gdist
colnames(distmat) <- colnames(scite.tree)
rownames(distmat) <- rownames(scite.tree)

precedes <- function(x,y) !is.infinite(distmat[x,y]) & is.infinite(distmat[y,x])
distance.to.root <- Vectorize(function(name) distmat["Root", name])

# Break ties for cells with two edges
cell.mat <- read.table(inmat)
cell.names <- str_replace_all(colnames(cell.mat), '[\\.:\\-]', '_')
indices.to.remove <- NULL
for (cell.name in cell.names)
{
  matches <- str_match(dot.file, sprintf("(.*) \\->.*%s;",cell.name))
  lines <- matches[,1]; parents <- matches[,2]
  if (sum(!is.na(parents))>=2)
  {
    parent.indices <- which(!is.na(parents))
    index.to.keep <- parent.indices[which.min(sapply(parent.indices, function(i) distance.to.root(parents[i])))]
    indices.to.remove <- c(indices.to.remove, parent.indices[parent.indices!=index.to.keep])
  }
}
dot.file <- dot.file[-indices.to.remove]

# Replace mutation names
mut.names <- str_replace_all(rownames(cell.mat), '[\\.:\\-]', '_')
gene.names <- str_match(mut.names, "^(.*)_.*_.*$")[,2]
new.names <- mut.names
if (all(!is.na(gene.names)))
{
  for (g in unique(gene.names))
  {
    if (sum(gene.names==g) >= 2)
    {
      new.names[gene.names==g] <- sprintf("%s (%d)", g, order(distance.to.root(mut.names[gene.names==g])))
    } else new.names[gene.names==g] <- g
  }
  # Rename old node to new name (not how I do this anymore)
  #dot.file <- c(dot.file[1:2], sprintf('%s [label="%s"];', mut.names, new.names), dot.file[3:length(dot.file)])
  # Replace old names with new names
  for (i in 1:length(mut.names))
  {
    dot.file <- sapply(dot.file, function(line) str_replace(line, mut.names[i], sprintf('"%s"', new.names[i])))
  }
}

writeLines(dot.file, outfile)
write.table(cbind(mut.names, new.names), outtable, row.names=FALSE, col.names=FALSE, quote=FALSE)