#!usr/bin/Rscript
# make a violin plot, do some simple stats on the prediction scores generated by
# hgt_predict_XXX.R scripts.
require(vioplot)

# assumes that these scripts have already been run and are populating the environment
print('U-tests can be used to assess the significance of a predictive classifier (such as we used) in properly classifying data.  Can be thought of as testing for the significance of the AUC, if it were a test statistic.')

print('test for significance of prediction for alpha/betaproteobacteria partition')
print(wilcox.test(as.numeric(alpha_for_roc[alpha_for_roc[,2]==0,1]),as.numeric(alpha_for_roc[alpha_for_roc[,2]==1,1])))

print('test for significance of prediction for firmicutes partition')
print(wilcox.test(as.numeric(firm_for_roc[firm_for_roc[,2]==0,1]),as.numeric(firm_for_roc[firm_for_roc[,2]==1,1])))

print('test for significance of prediction for unpartitioned dataset (overfit), predicting gains in firmicutes')
print(wilcox.test(as.numeric(all_for_roc[all_for_roc[,2]==0,1]),as.numeric(all_for_roc[all_for_roc[,2]==1,1])))

print('generates figure from supp (fig s8)')
print('y axis is prediction score. first panel is firmicutes, second is alpha/beta') 
par(mfrow=c(1,1))
#print(firm_for_roc[1:10,1])
god = density(as.numeric(firm_for_roc[,1]))
#print('densed')
#firmnum = as.numeric(firm_for_roc[,1])
#firmvec = as.vector(firm_for_roc[,2])
vioplot(as.numeric(firm_for_roc[firm_for_roc[,2]==0,1]),as.numeric(firm_for_roc[firm_for_roc[,2]==1,1]),names=c('no gain','gain'))
#print('firmed')
vioplot( as.numeric(alpha_for_roc[alpha_for_roc[,2]==0,1]),as.numeric(alpha_for_roc[alpha_for_roc[,2]==1,1]),names=c('no gain','gain') )

