#!usr/bin/Rscript

# to take a tree with ncbi IDs as leaves, change them to kegg IDs, prune unmapped leaves, and write resulting tree.

library(ape)
# need to get rid of bootstrapping values, which screw up reading.
#system("x	")

MOtree = read.tree('source_data/MOtree_noboot.newick')
maps = as.matrix(read.table('source_data/kegg_ncbi_spec_maps.txt'))

# had some trouble with bootstrap values screwing up ape's ability to parse- have too many specs anyways, just omit.
leaves = MOtree$tip.label

# probably more efficient way, but this is a little more understandable
to_prune = c()
mapped = c()
for (leaf in leaves) {
	# replace w kegg
	if (leaf %in% maps[,2]) {
#		if (length(leaves[leaves==leaf]) != length(maps[maps[,2]==leaf,3])) {		
#		print(length(maps[maps[,2]==leaf,3]))
#		print(maps[maps[,2]==leaf,3])
		
#		print(length(leaves[leaves==leaf]))
#		}
		map = maps[maps[,2]==leaf,3][1]
		if (map %in% mapped) {
		to_prune = append(to_prune,leaf)
		next
		}
		leaves[leaves==leaf] = map
		mapped = append(mapped,map)
#		print(maps[maps[,2]==leaf,3][1])
		# or add to prune list
		} else {
		to_prune = append(to_prune,leaf)
		}
	}
	
# update tree
MOtree$tip.label = leaves
MOtree$tip.label = leaves

newtree = drop.tip(MOtree,tip = to_prune,trim.internal=TRUE)

write.tree(newtree,'MOprunedtree.newick')
