#' Exact tests to detect mutually exclusive, co-occuring and altered genesets.
#'
#' @description Performs Pair-wise Fisher's Exact test to detect mutually exclusive or co-occuring events. Also identifies gene sets mutated significantly.
#' @details This function and plotting is inspired from genetic interaction analysis performed in the published study combining gene expression and mutation data in MDS. See reference for details.
#' @references Gerstung M, Pellagatti A, Malcovati L, et al. Combining gene mutation with gene expression data improves outcome prediction in myelodysplastic syndromes. Nature Communications. 2015;6:5901. doi:10.1038/ncomms6901.
#' @param maf an \code{\link{MAF}} object generated by \code{\link{read.maf}}
#' @param top check for interactions among top 'n' number of genes. Defaults to top 25. \code{genes}
#' @param genes List of genes among which interactions should be tested. If not provided, test will be performed between top 25 genes.
#' @param pvalue Default c(0.05, 0.01) p-value threshold. You can provide two values for upper and lower threshold.
#' @param returnAll If TRUE returns test statistics for all pair of tested genes. Default FALSE, returns for only genes below pvalue threshold.
#' @param findPathways Uses all mutually exclusive set of genes to further identify altered pathways. Default TRUE
#' @param kMax Default 3. maximum gene set size if findPathways is TRUE. This is time consuming for > 3.
#' @param fontSize cex for gene names. Default 0.8
#' @param verbose Default TRUE
#' @examples
#' laml.maf <- system.file("extdata", "tcga_laml.maf.gz", package = "maftools")
#' laml <- read.maf(maf = laml.maf)
#' somaticInteractions(maf = laml, top = 5)
#' @return list of data.tables
#' @export

somaticInteractions = function(maf, top = 25, genes = NULL, pvalue = c(0.05, 0.01), returnAll = FALSE, findPathways = TRUE, kMax = 3, fontSize = 0.8, verbose = TRUE){

  if(is.null(genes)){
    genes = getGeneSummary(x = maf)[1:top, Hugo_Symbol]
  }

  if(length(genes) < 2){
    stop("Minimum two genes required!")
  }

  om = createOncoMatrix(m = maf, g = genes)
  all.tsbs = as.character(getSampleSummary(x = maf)[,Tumor_Sample_Barcode])

  mutMat = t(om$numericMatrix)
  missing.tsbs = all.tsbs[!all.tsbs %in% rownames(mutMat)]

  if(nrow(mutMat) < 2){
    stop("Minimum two genes required!")
  }
  mutMat[mutMat > 0 ] = 1

  if(length(missing.tsbs) > 0){
    missing.tsbs = as.data.frame(matrix(data = 0, nrow = length(missing.tsbs), ncol = ncol(mutMat)),
                                 row.names = missing.tsbs)
    colnames(missing.tsbs) = colnames(mutMat)
    mutMat = rbind(mutMat, missing.tsbs)
  }

  #pairwise fisher test source code borrowed from: https://www.nature.com/articles/ncomms6901
  interactions = sapply(1:ncol(mutMat), function(i) sapply(1:ncol(mutMat), function(j) {f<- try(fisher.test(mutMat[,i], mutMat[,j]), silent=TRUE); if(class(f)=="try-error") NA else ifelse(f$estimate>1, -log10(f$p.val),log10(f$p.val))} ))
  oddsRatio <- oddsGenes <- sapply(1:ncol(mutMat), function(i) sapply(1:ncol(mutMat), function(j) {f<- try(fisher.test(mutMat[,i], mutMat[,j]), silent=TRUE); if(class(f)=="try-error") f=NA else f$estimate} ))
  rownames(interactions) = colnames(interactions) = rownames(oddsRatio) = colnames(oddsRatio) = colnames(mutMat)

  if(returnAll){
    sigPairs = which(x = 10^-abs(interactions) < 1, arr.ind = TRUE)
  }else{
    sigPairs = which(x = 10^-abs(interactions) < max(pvalue), arr.ind = TRUE)
  }

  sigPairsTbl = data.table::rbindlist(
                          lapply(X = seq_along(1:nrow(sigPairs)), function(i) {
                                  x = sigPairs[i,]
                                  g1 = rownames(interactions[x[1], x[2], drop = FALSE])
                                  g2 = colnames(interactions[x[1], x[2], drop = FALSE])
                                  tbl = as.data.frame(table(apply(X = mutMat[,c(g1, g2), drop = FALSE], 1, paste, collapse = "")))
                                  combn = data.frame(t(tbl$Freq))
                                  colnames(combn) = tbl$Var1
                                  pval = 10^-abs(interactions[x[1], x[2]])
                                  fest = oddsRatio[x[1], x[2]]
                                  d = data.table::data.table(gene1 = g1,
                                                         gene2 = g2,
                                                         pValue = pval, oddsRatio = fest)
                                  d = cbind(d, combn)
                                  d
                        }), fill = TRUE)

  sigPairsTbl = sigPairsTbl[!gene1 == gene2] #Remove doagonal elements
  sigPairsTbl$Event = ifelse(test = sigPairsTbl$oddsRatio > 1, yes = "Co_Occurance", no = "Mutually_Exclusive")
  sigPairsTbl$pair = apply(X = sigPairsTbl[,.(gene1, gene2)], MARGIN = 1, FUN = function(x) paste(sort(unique(x)), collapse = ", "))
  sigPairsTblSig = sigPairsTbl[order(as.numeric(pValue))][!duplicated(pair)]

  #Source code borrowed from: https://www.nature.com/articles/ncomms6901
  if(nrow(interactions) >= 5){
    interactions[10^-abs(interactions) > max(pvalue)] = 0
    diag(interactions) <- 0
    m <- nrow(interactions)
    n <- ncol(interactions)

    interactions[interactions < -4] = -4
    interactions[interactions > 4] = 4
    r = interactions
    rd = hclust(dist(r))$order
    cd = hclust(dist(t(r)))$order
    interactions = interactions[rd, , drop = FALSE]
    interactions = interactions[,rd, drop = FALSE]

    interactions[lower.tri(x = interactions)] = NA

    par(bty="n", mgp = c(2,.5,0), mar = c(2, 4, 3, 5)+.1, las=2, tcl=-.33)
    image(x=1:n, y=1:m, interactions, col=RColorBrewer::brewer.pal(9,"PiYG"),
          breaks = c(-4:0-.Machine$double.eps,0:4), xaxt="n", yaxt="n",
          xlab="",ylab="", xlim=c(0, n+4), ylim=c(0, n+4))
    abline(h=0:n+.5, col="white", lwd=.5)
    abline(v=0:n+.5, col="white", lwd=.5)

    mtext(side = 2, at = 1:m, text = colnames(interactions), cex = fontSize, font = 2)
    mtext(side = 3, at = 1:n, text = colnames(interactions), las = 2, line = -2, cex = fontSize, font = 2)

    #q <- p.adjust(10^-abs(interactions), method="BH")
    #p <- p.adjust(10^-abs(interactions), method="holm")
    #w = arrayInd(which(interactions < .05), rep(m,2))
    #points(w, pch=".", col="white", cex=1.5)
    w = arrayInd(which(10^-abs(interactions) < min(pvalue)), rep(m,2))
    points(w, pch="*", col="black")
    w = arrayInd(which(10^-abs(interactions) < max(pvalue)), rep(m,2))
    points(w, pch=".", col="black")
    #image(y = 1:8 +6, x=rep(n,2)+c(2,2.5)+1, z=matrix(c(1:8), nrow=1), col=brewer.pal(8,"PiYG"), add=TRUE)
    image(y = seq(0.5*nrow(interactions), 0.9*nrow(interactions), length.out = 8), x=rep(n,2)+c(2,2.5)+1, z=matrix(c(1:8), nrow=1), col = RColorBrewer::brewer.pal(8,"PiYG"), add=TRUE)
    #axis(side = 4, at = seq(1,7) + 6.5,  tcl=-.15, label=seq(-3, 3), las=1, lwd=.5)
    atLims = seq(0.5*nrow(interactions), 0.9*nrow(interactions), length.out = 7)
    axis(side = 4, at = atLims,  tcl=-.15, labels =c(3:1, 0, 1:3), las=1, lwd=.5)
    mtext(side=4, at = median(atLims), "log10 (p-value)", las=3, cex = 0.9, line = 3, font = 2)

    par(xpd=NA)
    text(x=n+2.2, y= max(atLims)+1.2, "Co-occurance", pos=4, cex = 0.9, font = 2)
    text(x=n+2.2, y = min(atLims)-1.2, "Exclusive", pos=4, cex = 0.9, font = 2)

    points(x = n+1, y = 0.2*n, pch = "*", cex = 2)
    text(x = n+1, y = 0.2*n, paste0(" p < ", min(pvalue)), pos=4, cex = 0.9, font = 2)
    points(x = n+1, y = 0.1*n, pch = ".", cex = 2)
    text(x = n+1, y = 0.1*n, paste0("p < ", max(pvalue)), pos=4, cex = 0.9)
  }


  sig.genes.pvals = NULL

  if(findPathways){
    if(nrow(sigPairsTblSig[Event %in% 'Mutually_Exclusive']) > 2){
      if(verbose){
        message("Checking for Gene sets.. ")
      }
      sig.genes = unique(c(sigPairsTblSig[Event %in% 'Mutually_Exclusive', gene1], sigPairsTblSig[Event %in% 'Mutually_Exclusive', gene2]))
      sig.genes.pvals = c()

      for(k in 3:kMax){
        sig.genes.combn = combn(x = sig.genes, m = k)

        if(verbose){
          message(paste0("k = ", k, ": ", ncol(sig.genes.combn), " combinations.."))
        }

        sps = lapply(seq_along(1:ncol(sig.genes.combn)), function(i){
          x = sig.genes.combn[,i]
          mm = mutMat[,x, drop = FALSE]
          grid.mat = t(expand.grid(rep(list(0:1), k)))
          #colllapse grid and get all the levels (all posiible combinations)
          lvls = names(table(apply(grid.mat, 2, paste, collapse = '')))
          mm.lvls = data.frame(table(apply(mm, 1, paste, collapse = '')))

          #check if for any missing combinations
          lvls.missing = lvls[!lvls %in% mm.lvls[,1]]

          if(length(lvls.missing) > 0){
            mm.lvls = rbind(mm.lvls, data.frame(Var1 = lvls.missing, Freq = 0)) #add missing combinations with zero values
          }

          #reorder
          mm.lvls = mm.lvls[order(mm.lvls$Var1),]
          if(verbose){
            message("Geneset: ", paste(x, collapse = ", "))
          }
          xp = cometExactTest::comet_exact_test(tbl = as.integer(as.character(mm.lvls$Freq)), mutmatplot = FALSE, pvalthresh = 0.1)
          data.table::data.table(gene_set = paste(x, collapse = ", "), pvalue = xp)
        })

        sig.genes.pvals = rbind(sig.genes.pvals, data.table::rbindlist(sps))
      }

      sig.genes.pvals = sig.genes.pvals[pvalue > 0][order(pvalue, decreasing = FALSE)]
      if(nrow(sig.genes.pvals[pvalue < 0.05]) > 0){
        if(verbose){
          message("Signifcantly altered gene-sets:")
          print(sig.genes.pvals[pvalue < 0.05])
        }
      }

    }
  }

  sigPairsTblSig = sigPairsTblSig[,pair := NULL]

  return(list(pairs = sigPairsTblSig, gene_sets = sig.genes.pvals))
}
