#!usr/bin/Rscript
# 10/6/2014
# updated 6/2/15
# to display toposort/ other data for many groups for figure S6. 

library(beeswarm)
#print('read order')
ordering = read.table('processed_data/reproduced_analysis/hgt_toposort_attr_090915.txt',header=T)
#print('read brite')
brite=as.matrix(read.table('processed_data/annots_helperfiles/brite.reparsed.083115',sep='\t'))
#brite = brite[as.vector(brite[1,]) %in% na.omit(rownames(ordering)),]

gains = read.table(gzfile('gainLoss_results/MOtree_GLrun/AncestralReconstructPosterior.txt.gain_probs'),header=T)
gains = colSums(gains)

# these are found by standard enrichment analysis of the ranks.
enriches = c('Bacterial motility proteins [BR:ko02035]','Pilus system','Flagellar system','Carbohydrate metabolism','Xenobiotics biodegradation and metabolism','Type II secretion system','Pilin secretion/fimbrial assembly protein','Two-component system [BR:ko02022]','Conjugal transfer pilus assembly protein','Type IV secretion system','Type III secretion system')

topocats = c()

for (enrich in enriches) {
	kos = brite[as.vector(brite[,2])==enrich,]
	kos = cbind(kos,ordering[kos[,1],])
#	print(enrich)
#	print(kos[,1])
	topocats = rbind(topocats,kos)
	}
	
colnames(topocats) = c('KO','category','rank')
#gained = gains[topocats[,1]]

topocats = na.omit(as.matrix(cbind(topocats,gains[topocats[,1]])))

cols = rep('',nrow(topocats))
for (i in 1:6) {
cols[as.numeric(topocats[,'rank'])==i] = rainbow(6)[i]
#print(length(which(as.numeric(topocats[,'rank'])==i)))
}

#pdf('figs6_toporank_beeswarm_060215.pdf')
par(mar=c(5,8,4,2))
beeswarm(jitter(as.numeric(topocats[,'rank'])) ~ factor(topocats[,'category'],levels=enriches),las=1,cex.axis=.5,,pch=19,cex=.5,corral='gutter',horiz=TRUE,xlab='Rank in topological sort',ylab='')
beeswarm(jitter(as.numeric(topocats[,4])) ~ factor(topocats[,'category'],levels=enriches),las=1,cex.axis=.5,,pch=19,cex=.5,corral='gutter',horiz=TRUE,xlab='Gains in tree (probabilistic counts)',ylab='')


#dev.off()
