#' Compare mutation load against TCGA cohorts
#' @description Compares mutation load in input MAF against all of 33 TCGA cohorts
#' @param maf an \code{\link{MAF}} object generated by \code{\link{read.maf}}
#' @param capture_size capture size for input MAF in MBs. Default NULL. If provided plot will be scaled to mutations per mb. TCGA capture size is assumed to be 50mb.
#' @param cohortName name for the input MAF cohort. Default "Input"
#' @param primarySite If TRUE uses primary site of cancer as labels instead of TCGA project IDs. Default FALSE.
#' @param col color vector for length 2 TCGA cohorts and input MAF cohort. Default gray70 and black.
#' @param medianCol color for median line. Default red.
#' @param fn If provided saves plot to output pdf with basename fn. Default NULL.
#' @param width width for output plot
#' @param height height of output plot
#' @param fontSize base fontsize. Default 10.
#' @return ggplot object
#' @examples
#' laml.maf <- system.file("extdata", "tcga_laml.maf.gz", package = "maftools")
#' laml <- read.maf(maf = laml.maf)
#' tcgaCompare(maf = laml, cohortName = "AML")
#' @export

tcgaCompare = function(maf, capture_size = NULL, cohortName = NULL, primarySite = FALSE, col = c('gray70', 'black'), medianCol = 'red', fn = NULL, width = 8, height = 5, fontSize = 10){

  tcga.cohort = system.file('extdata', 'tcga_cohort.txt.gz', package = 'maftools')

  if(Sys.info()[['sysname']] == 'Windows'){
    tcga.cohort.gz = gzfile(description = tcga.cohort, open = 'r')
    tcga.cohort <- suppressWarnings( data.table(read.csv( file = tcga.cohort.gz, header = TRUE, sep = '\t', stringsAsFactors = FALSE)) )
    close(tcga.cohort.gz)
  } else{
    tcga.cohort = data.table::fread(input = paste('zcat <', tcga.cohort), sep = '\t', stringsAsFactors = FALSE)
  }

  if(primarySite){
    tcga.cohort = tcga.cohort[,.(Tumor_Sample_Barcode, total, site)]
    colnames(tcga.cohort)[3] = 'cohort'
  }else{
    tcga.cohort = tcga.cohort[,.(Tumor_Sample_Barcode, total, cohort)]
  }


  maf.mutload = getSampleSummary(maf)[,.(Tumor_Sample_Barcode, total)]
  if(is.null(cohortName)){
    cohortName = 'Input'
  }

  maf.mutload[,cohort := cohortName]
  tcga.cohort$total = as.numeric(as.character(tcga.cohort$total))
  maf.mutload$total = as.numeric(as.character(maf.mutload$total))

  if(!is.null(capture_size)){
    maf.mutload[,total := total/capture_size]
    tcga.cohort[,total := total/50]
  }

  tcga.cohort = rbind(tcga.cohort, maf.mutload)
  tcga.cohort.med = tcga.cohort[,.(.N, median(total)),cohort][order(V2, decreasing = TRUE)]

  tcga.cohort$cohort = factor(x = tcga.cohort$cohort,levels = tcga.cohort.med$cohort)
  colnames(tcga.cohort.med) = c('Cohort', 'Cohort_Size', 'Median_Mutations')

  tcga.cohort$TCGA = ifelse(test = tcga.cohort$cohort %in% cohortName, yes = 'Input', no = 'TCGA')

  tcga.cohort.gg = ggplot(data = tcga.cohort, aes(x = cohort, y = total, group = Tumor_Sample_Barcode, color = TCGA))+
                    geom_point(position = position_jitter(width = 0.3), size = 0.2, alpha = 0.8)+
                    geom_point(data = tcga.cohort.med, aes(y = Median_Mutations, x = Cohort), shape = 95, inherit.aes = FALSE, color = medianCol, size = 6)+
                    cowplot::theme_cowplot(font_size = fontSize, line_size = 1)+cowplot::background_grid(major = 'x')+
                    theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1, face = "bold"), legend.position = 'none', axis.text.y = element_text(face="bold", size = 10), axis.title.y = element_text(face="bold", size = 12))+
                    scale_color_manual(values = rev(col))

  if(is.null(capture_size)){
    tcga.cohort.gg = tcga.cohort.gg+scale_y_log10(breaks = c(0.01, 0.1, 1, 10, 100, 1000, 10000))+
      ylab('log10 (Mutations per sample)')
  }else{
    tcga.cohort.gg = tcga.cohort.gg + expand_limits(y = c(0.01, 1000))+
                      scale_y_log10(breaks = c(0.01, 0.1, 1, 10, 100, 1000), labels = c(0.01, 0.1, 1, 10, 100, 1000))+
      ylab("log10 (Mutations per MB)")
  }

  if(!is.null(fn)){
    cowplot::save_plot(filename = paste0(fn, '.pdf'), plot = tcga.cohort.gg, base_height = height, base_width = width)
    write.table(tcga.cohort.med, file = paste0(fn, '_mutation_load.tsv'), sep = '\t', quote = FALSE, row.names = FALSE)
  }

  message("Summary..")
  print(tcga.cohort.med)
  print(tcga.cohort.gg)

  return(tcga.cohort.gg)
}
