#!usr/bin/Rscript
# to directly compare the results of MP and EM reconstructions in gainLoss.

require(vioplot)

specs = as.vector(read.table('gainLoss_results/MOtree_GLrun/MOtree_specs'))[,]

#mp = read.table('kegg2013_MOtree_results/gainLossMP.2.00099.AncestralReconstructSankoff.txt',header=T)
mp = read.table(gzfile('gainLoss_results/MOtree_GLrun/gainLossMP.2.00099.AncestralReconstructSankoff.txt.gz'),header=T)
# get rid of tips- known reconstructions there
mp = mp[!(mp$Node %in% specs),]

# get rid of tips- known reconstructions there
em = read.table(gzfile('gainLoss_results/MOtree_GLrun/AncestralReconstructPosterior.txt.gz'),header=T)
em = em[!(em$Node %in% specs),]


sortem = order(em$Prob,em$POS,em$Node)
sortmp = order(mp$State,mp$POS,mp$Node)

matted = cbind(mp[sortmp,'State'],em[sortem,'Prob'])
matted = na.omit(matted)

#print(cor(matted))
#print(cor(matted,method='spearman'))

em = ''
mp = ''
sortem=''
sortmp=''

noprobs = matted[matted[,1]==0,2]
yesprobs = matted[matted[,1]==1,2]

#pdf('em_mp_compare_box_072015.pdf')
#boxplot(noprobs, yesprobs,labels=c('MP=absent','MP=present'),ylab = 'EM Presence probability estimate')
#dev.off()

print('visualizations of the correspondence between MP and EM ancestral reconstructions')
#pdf('em_mp_compare_vio_072015.pdf')
#vioplot(noprobs, yesprobs) #, labels=c('MP=absent','MP=present'), ylab = 'EM Presence probability estimate', horiz=TRUE, col='gray')
#dev.off()

no = density(matted[matted[,1]==0,2],bw=.0001)
yes = density(matted[matted[,1]==1,2],bw=.0001)

#pdf('mp_em_density_072015.pdf')
plot(no,col='black',xlab='EM reconstruction probability of presence',main='Black=MP not present, Red=MP present',xlim=c(0,1)) 
lines(yes,col='red')
#dev.off()


print('now just simple heuristic to evaluate agreement')
matted[,2] = round(matted[,2])
agreement = length(which(matted[,1]==matted[,2])) / nrow(matted)
cat('agreement between EM and MP is ',agreement,'\n')
