#!/usr/bin/Rscript
# powerish analysis across space of MO tree, making heatmaps to display how well combos of presence/gain values can reject a null hypothesis for preferential gain

# these commented lines represent other, more naive approaches to computing power (for instance, by just using hypergeometric test) 
#source('~/sim_null_dist_explore.R')
# long loop to calc power for BIG dataset
#powermat = matrix(rep(1,112*1400),1400)
#for (i in seq(10,1400,by=10)) {
#for (j in seq(5,110,by=5)) {
#powermat[i,j] = simnull_sampler(i,j,1400,binning='wide')
#}
#}
#powermat = as.matrix(read.table('ciccsize_min_pval_020314.txt',header=T))

# read in p-values computed using actual simulated null dist 
#powermat = as.matrix(read.table('processed_data/sim_null_powerps_052814.txt',header=T))
powermat = as.matrix(read.table('processed_data/sim_null_powerps.txt',header=T))
rownames(powermat) = gsub('X','',rownames(powermat))
colnames(powermat) = gsub('X','',colnames(powermat))

#print(sapply(sort(as.numeric(gsub('X','',colnames(powermat)))),paste))

powermat = powermat[sapply(sort(as.numeric(gsub('X','',rownames(powermat)))),paste),sapply(sort(as.numeric(gsub('X','',colnames(powermat)))),paste)]

z = -log10(powermat+.0000001)
	
#pdf('powerheat_motree_upper_new.pdf')
image(z,ylab='Gains',xlab='Prevalence',xaxt='n',yaxt='n', main = 'minimum p-value in each pair bin, log10 scale')
axis(1,at=seq(0,1,by=.1),labels=seq(0,max(as.numeric(rownames(powermat))),by=max(as.numeric(rownames(powermat))/10)))
axis(2,at=seq(0,1,by=.1),labels=seq(0,max(as.numeric(colnames(powermat))),by=max(as.numeric(colnames(powermat))/10)))
legend(.05,.8,legend=c(paste('p-value =',max(powermat)),'p-value = 0.000001'),fill=c('red',"#FFFFBFFF"),bg='white')
#dev.off()

