library(beeswarm)

#ps=read.table('str_pheno_mmas_pvals_110216.txt',header=T)
#ps=read.table('str_pheno_mmas_pvals_120616_final.txt',header=T)
ps = read.table('str_pheno_mmas_pvals_031717_final.txt',header=T,sep='\t')
pheno = read.table('data/phenotype_published_raw_rename.txt',header=T,sep='\t')


strs = read.table('data/mip_geno_filtered_table.txt',header=T)

colnames(strs) = toupper(gsub('_[0-9a-zA-Z]+','',colnames(strs)))
# fix a few mapping errors between geno/pheno datasets
colnames(strs)[colnames(strs)=='COL'] = 'COL.0'
colnames(strs)[colnames(strs)=='OMO2.1'] = 'OMO.2.1'
colnames(strs)[colnames(strs)=='OMO2.3'] = 'OMO.2.3'
colnames(strs)[colnames(strs)=='SHA'] = 'SHAHDARA'
colnames(strs)[colnames(strs)=='AN.0'] = 'AN.1'
colnames(strs)[colnames(strs)=='KNOX.18'] = 'KNO.18'
colnames(strs)[colnames(strs)=='KNOX.10'] = 'KNO.10'
colnames(strs)[colnames(strs)=='VAR2.6'] = 'VAR.2.6'
colnames(strs)[colnames(strs)=='VAR2.1'] = 'VAR.2.1'


print(colnames(strs))

#nullsize = 10000

phenoaccs = toupper(gsub('-','\\.',as.vector(pheno[,2])))
accs = phenoaccs[phenoaccs %in% colnames(strs)]


phenoin = as.matrix(pheno[phenoaccs %in% accs,])
rownames(phenoin) = as.vector(accs)

repin = as.matrix(strs[,colnames(strs) %in% accs])



# rownames already set as mips
phenoin = phenoin[colnames(repin),]
print(dim(phenoin))

pdf('str_assocs_mma_032017.pdf')
hist(as.matrix(ps),100,xlab='P-values')
#dev.off()
			
for (str in rownames(ps)) {
	for (pheno in colnames(ps)) {
		if (is.na(ps[str,pheno])) {
			next
			}
		if ((ps[str,pheno])<=.000001) {
			phenovec = phenoin[,pheno]
			genovec = t(strs[str,rownames(phenoin)])
			matted = na.omit(cbind(as.numeric(genovec),as.numeric(phenovec)))
#			pdf(paste(str,pheno,'.pdf',sep=''))
			beeswarm(matted[,2]~matted[,1],xlab=paste(str,'allele'),ylab=pheno,pch=19,cex=.8)
			boxplot(matted[,2]~matted[,1],add=TRUE,varwidth=TRUE)
#			dev.off()
			}
		}
	}
#for (i in to_log) {
dev.off()
