# some downstream analysis of str-pheno assocs 8/31/16
library(gplots)
library(coxme)
library(RColorBrewer)

p_thresh = 1E-6
ft_thresh = 1E-10 # FT traits are weird. e.g. inflated. use different threshold.
ft_traits = c(
  'X1_LD',
  'X2_LDV',
  "X3_SD",                  
  "X4_SDV",                  
  "X5_FT10",                 
  "X6_FT16",                
  "X7_FT22",
  "X39_0W", 
  "X40_2W",
  "X41_4W",
  "X42_8W",
  "X43_FLC",
  "X44_FRI",
  "X45_8W.GH.FT",
  "X46_8W.GH.LN",
  "X47_0W.GH.FT",
  "X48_0W.GH.LN",
  "X57_FT.Field",
  "X59_FT.GH",
  "X80_LN10",
  "X81_LN16",
  "X82_LN22",
  "X158_Silique.16",
  "X159_Silique.22"
  )

# double-checked and these are the RIGHT VERSIONS 3/23/17
mmaps= as.matrix(read.table('~/Dropbox/Ath_STRs/strwa/str_pheno_mmas_pvals_031717_final.txt',header=TRUE))
# p-vals from tests with genotypes shuffled
mma_shuf = as.matrix(read.table('~/Dropbox/Ath_STRs/strwa/str_pheno_mmas_pvals_031417_SHUFFLE.txt', header=TRUE))

# omit weird data based on p-value distribution in either shuffled or real cases
weird_pvals = c('X4_SDV','X16_Na23','X20_K39','X24_Co59','X25_Ni60','X30_Mo98','X34_avrRpt2','X45_8W.GH.FT','X46_8W.GH.LN','X57_FT.Field','X78_YEL','X158_Silique.16','X159_Silique.22','X162_Germ.16','X164_Width.10','X167_Chlorosis.10','X169_Chlorosis.22','X170_Anthocyanin','X178_Leaf.roll.22','X183_Trichome.avg.C','X185_Aphid.number','X280_Seed.bank.133.91')
mmaps = mmaps[,!(colnames(mmaps) %in% weird_pvals) ]

#pcaps = pcaps[,colnames(mmaps)]
mma_shuf = mma_shuf[,colnames(mmaps)]
#pca_shuf = pca_shuf[,colnames(mmaps)]
missing_strs = rownames(mmaps)[countnas(mmaps) == ncol(mmaps)]
mmaps = mmaps[!(rownames(mmaps) %in% missing_strs),]
#pcaps = pcaps[rownames(mmaps),]
mma_shuf = mma_shuf[rownames(mmaps),]
#pca_shuf = pca_shuf[rownames(mmaps),]

# lose a few which are p ~ 0.
#plot(mmaps,pcaps,pch='.',log='xy',xlab='MMA p-values',ylab='PCA p-values')
#print(cor.test(mmaps,pcaps,method='spearman'))

hist(mmaps,100,xlab='MMA p-values')
#hist(pcaps,100,xlab='PCA p-values')

# also do qqplot for mmas:
source('~/Dropbox/Ath_STRs/code/pval_qqplot.R')
ggd.qqplot(na.omit(as.vector(mmaps)), permed_ps = mma_shuf[mma_shuf>0]) # shouldn't be p=0!!!
#ggd.qqplot(na.omit(as.vector(pcaps), permed_ps = pca_shuf[pca_shuf>0])) # shouldn't be p=0!!!

# uncomment to make a pdf figure
#pdf('qqplot_mma_vs_pca_032017.pdf') 
# for (i in colnames(mmaps)) {
#   ggd.qqplot(mmaps[mmaps[,i]>0,i],main=paste('LMM,',i))
# #  if (length(which(is.na(pcaps[,i])))==nrow(pcaps) ) {next}
# #  ggd.qqplot(pcaps[pcaps[,i]>0,i],main=paste('PCA,',i))
# }
# #dev.off()

# uncomment to make the pdf figure
pdf('output/qqplot_mma_w_permuted_032017.pdf')
par(mfrow=c(2,2))
for (i in colnames(mmaps)) {
  if (min(na.omit(mma_shuf[,i])) ==0) {
    print(i)
    next}
  ggd.qqplot(na.omit(as.vector(mmaps[,i])),permed_ps=mma_shuf[,i], main=i) # shouldn't be p=0!!!
  }
dev.off()
par(mfrow=c(1,1))
# pdf('qqplot_pca_w_permuted_03217.pdf')
# for (i in colnames(pcaps)) {
#   if (min(na.omit(pca_shuf[,i])) ==0) {
#     print(i)
#     next}
#   ggd.qqplot(na.omit(as.vector(pcaps[,i])),permed_ps=pca_shuf[,i], main=i) # shouldn't be p=0!!!
# }
# dev.off()

# get rid of nas for heatmap.2
# note that this crashes Rstudio interactively sometimes
mmaps_nona = mmaps
mmaps_nona[is.na(mmaps)] = .999
mmaps_nona[mmaps_nona>=1] = .999

rev_cm.colors = function(x) {
  cols = cm.colors(x)
  return(rev(cols))
}

# x is int
red_pal = function(x) {
  out = rev(brewer.pal(n=x, name='Reds'))
  return(out)
}

# this is huge, so not putting into rmarkdown doc 
print('HEATMAP IS WRITTEN AS PDF')
pdf('output/heatmap_strpheno_032217.pdf')
heatmap.2(log10(mmaps_nona),trace='none',cexRow = .1,cexCol=.3, col=red_pal, breaks = seq(trunc(min(log10(mmaps_nona))),1,by=2.5), symm=F,symkey=F,symbreaks=T, scale="none")
dev.off()

mmas_sigs = mmaps_nona
mmas_sigs[ !(colnames(mmaps_nona) %in% ft_traits) ][(mmaps_nona[, !(colnames(mmaps_nona) %in% ft_traits)] < p_thresh)] = 1
mmas_sigs[ !(colnames(mmaps_nona) %in% ft_traits) ][(mmaps_nona[, !(colnames(mmaps_nona) %in% ft_traits)] >= p_thresh)] = 0
mmas_sigs[ (colnames(mmaps_nona) %in% ft_traits) ][(mmaps_nona[, (colnames(mmaps_nona) %in% ft_traits)] < ft_thresh)] = 1
mmas_sigs[ (colnames(mmaps_nona) %in% ft_traits) ][(mmaps_nona[, (colnames(mmaps_nona) %in% ft_traits)] >= ft_thresh)] = 0

# ID phenos with sig results
sig_phenos = colnames(mmas_sigs[,colSums(mmas_sigs)>0])

par(mfrow=c(1,1))
#pcas_sigs = pcaps_nona
#pcas_sigs[pcaps_nona<p_thresh] = 1
#pcas_sigs[pcaps_nona>=p_thresh] = 0

associations = c()
for ( j in colnames(mmas_sigs)) {
  for (i in rownames(mmas_sigs)) {
    if ((mmaps_nona[i,j] < p_thresh & !(j %in% ft_traits)) | ((mmaps_nona[i,j] < ft_thresh) & (j %in% ft_traits))) {
      #cat(i,j,'\n')
      associations = rbind(associations,c(i,j,mmaps[i,j]))
    }
  }
}

colnames(associations) = c('ID','Phenotype','P-value')

write.table(associations,'output/str_pheno_assocs_NEW_032317.txt',quote=FALSE,row.names=FALSE, sep='\t')

# uncomment for pdf
pdf('output/qqplot_mma_sigsonly_032017.pdf') 
par(mfrow = c(3,3))
for (i in unique(associations[,'Phenotype'])) {
  ggd.qqplot(mmaps[,i], permed_ps = mma_shuf[,i], main=paste('LMM,',i),cex=.5)
}
dev.off()

# account for GWAS SNPs for strong STR associations (svp, cry1, etc3)
# re-fit lmekin models for 65291 and 37915
# ---> done by another script. 

assoc_annot_mat = rbind(table(annotation[associations[,'ID'],'annotation'])/nrow(associations), table(annotation[rownames(genos),'annotation'])/nrow(genos))
barplot(assoc_annot_mat, beside=TRUE, las=2, ylim=c(0,.4))
legend(1,.4,fill=c('black','light gray'),legend=c('Association STRs','All STRs'))

assoc_annot_mat = rbind(table(annotation[unique(associations[,'ID']),'annotation'])/length(unique(associations[,'ID'])), table(annotation[rownames(genos),'annotation'])/nrow(genos))
barplot(assoc_annot_mat, beside=TRUE, las=2, ylim=c(0,.4))
legend(1,.4,fill=c('black','light gray'),legend=c('Association STRs','All STRs'))

source('code/dtf_validations_combine_040317.R')
set.seed(311)
# a priori flowering candidate associations- from brachi et al. 2010
apri = read.csv('strwa/apriori_flowering_candidates_brachi2010.csv')
candidate_in_strs = length(which(apri[,2] %in% annotation[rownames(genos),'gene']))
candidate_in_assoc = length(which(apri[,2] %in% annotation[associations[,1],'gene']))



