library(stringr)

infile <- commandArgs(trailingOnly=TRUE)[1]
outdir <- commandArgs(trailingOnly=TRUE)[2]
sample.name <- commandArgs(trailingOnly=TRUE)[3]
filtered.cell.names <- readLines(commandArgs(trailingOnly=TRUE)[4])

# Function to make a tree. Replace if we decide to do maximum parsimony or something.
library(ape)
make.tree <- function(seg.data)
{
  #nj(dist(t(seg.data), method="manhattan"))
  fastme.bal(dist(t(seg.data), method="manhattan"))
}

rle.seg.data <- read.table(infile, header=TRUE)
rle.seg.data <- rle.seg.data[,-(1:5)]
filtered.rle.seg.data <- rle.seg.data[,str_match(colnames(rle.seg.data), '([A-Z]A?\\.\\d+)[._].*$')[,2] %in% filtered.cell.names]
tree <- make.tree(filtered.rle.seg.data)

# Instrumentation
#print("Raw names:")
#print(colnames(rle.seg.data))
#print("Short names:")
#print(str_match(colnames(rle.seg.data), '([A-Z]A?\\.\\d+)[._].*$')[,2])
#print("Filtering list:")
#print(filtered.cell.names)
#print("After filtering:")
#print(colnames(rle.seg.data))
print("Missing cells:")
setdiff(filtered.cell.names, str_match(colnames(rle.seg.data), '([A-Z]A?\\.\\d+)[._].*$')[,2])

#stopifnot(ncol(rle.seg.data) == length(filtered.cell.names))

# Write output
write.tree(tree, sprintf('%s/%s.tree', outdir, sample.name))
pdf(sprintf('%s/%s.tree.pdf', outdir, sample.name))
tryCatch(plot(tree, cex=.4), finally=dev.off())