#' Rainfall plot to display hyper mutated genomic regions.
#' @description Plots inter variant distance as a function of genomic locus.
#' @details If `detectChangePoints`` is set to TRUE, this method will use Change-Point detection method to identify genomic loci where average inter-mutation distance changes from the backgorund.
#' Segments detected with less than 6 mutations are filtered out.
#'
#' @param maf an \code{\link{MAF}} object generated by \code{\link{read.maf}}. Required.
#' @param tsb specify sample names (Tumor_Sample_Barcodes) for which plotting has to be done. If NULL, draws plot for most mutated sample.
#' @param detectChangePoints If TRUE, detectes genomic change points where potential kataegis are formed. Results are written to an output tab delimted file.
#' @param color named vector of colors for each coversion class.
#' @param ref.build Reference build for chromosome sizes. Can be hg18, hg19 or hg38. Default hg19.
#' @param savePlot If TRUE plot is saved to output pdf. Default FALSE.
#' @param width width of plot to be saved.
#' @param height height of plot to be saved.
#' @param fontSize Default 12.
#' @param pointSize Default 2.
#' @return returns ggplot object of the plot which can be further modified. Results are written to an output file with suffix changePoints.tsv
#' @importFrom changepoint cpt.mean cpts
#' @export


rainfallPlot = function(maf, tsb = NULL, detectChangePoints = FALSE,
                        ref.build = 'hg19', color = NULL, savePlot = FALSE, width = 6, height = 3, fontSize = 12, pointSize = 1){

  if(is.null(tsb)){
    tsb = as.character(getSampleSummary(maf)[1,Tumor_Sample_Barcode])
  }

  message(paste0("Processing ", tsb, ".."))

  maf.snp = subsetMaf(maf = maf, includeSyn = TRUE, tsb = tsb, fields = 'Hugo_Symbol', query = "Variant_Type == 'SNP'")

  if(nrow(maf.snp) == 0){
    stop('No more single nucleotide variants left after filtering for SNP in Variant_Type field.')
  }

  maf.snp = maf.snp[ ,.(Chromosome, Hugo_Symbol, Start_Position, End_Position, Reference_Allele, Tumor_Seq_Allele2, Tumor_Sample_Barcode, Variant_Type)]

  maf.snp$con = paste(maf.snp[,Reference_Allele], maf.snp[,Tumor_Seq_Allele2], sep = '>')

  conv = c("T>C", "T>C", "C>T", "C>T", "T>A", "T>A", "T>G", "T>G", "C>A", "C>A", "C>G", "C>G")
  names(conv) = c('A>G', 'T>C', 'C>T', 'G>A', 'A>T', 'T>A', 'A>C', 'T>G', 'C>A', 'G>T', 'C>G', 'G>C')
  maf.snp$con.class = conv[as.character(maf.snp$con)]

  maf.snp = transformSegments(maf.snp, build = ref.build)
  maf.snp$diff = suppressWarnings( log10(c(0, diff(maf.snp$Start_Position_updated))+1) )
  #Remove any NA's if generated
  maf.snp = maf.snp[complete.cases(diff)]

  if(is.null(color)){
    col = RColorBrewer::brewer.pal(n = 6, name = 'Set1')
    names(col) = c('C>T', 'C>G', 'C>A', 'T>C', 'T>A', 'T>G')
  }else{
    col = color
  }

  #hg19 chromosome lengths
  if(ref.build == 'hg19'){
    chr.lens = c(249250621, 243199373, 198022430, 191154276, 180915260, 171115067, 159138663,
                 146364022, 141213431, 135534747, 135006516, 133851895, 115169878, 107349540,
                 102531392, 90354753, 81195210, 78077248, 59128983, 63025520, 48129895, 51304566,
                 155270560, 59373566)
  } else if(ref.build == 'hg18'){
    chr.lens = c(247249719, 242951149, 199501827, 191273063, 180857866, 170899992,
                 158821424, 146274826, 140273252, 135374737, 134452384, 132349534,
                 114142980, 106368585, 100338915, 88827254, 78774742, 76117153,
                 63811651, 62435964, 46944323, 49691432, 154913754, 57772954)
  } else if(ref.build == 'hg38'){
    chr.lens = c(248956422, 242193529, 198295559, 190214555, 181538259, 170805979,
                 159345973, 145138636, 138394717, 133797422, 135086622, 133275309,
                 114364328, 107043718, 101991189, 90338345, 83257441, 80373285,
                 58617616, 64444167, 46709983, 50818468, 156040895, 57227415)
  }

  chr.lens.sumsum = cumsum(chr.lens)

  gg.rf = ggplot(data = maf.snp, aes(x= Start_Position_updated, y = diff, col = con.class))+
                geom_point(size = pointSize, alpha = 0.6)+cowplot::theme_cowplot(font_size = fontSize, line_size = 1)+
                cowplot::background_grid(major = 'y')+xlab('')+ylab('log10(inter event distance)')+
                theme(axis.line.x = element_blank(), axis.title.y = element_text(face = "bold"), axis.text.x = element_text(face = "bold"), axis.text.y = element_text(size = 12, face = "bold"))+scale_x_continuous(breaks = chr.lens.sumsum, labels = c(1:22, 'X', 'Y'))+
                geom_vline(xintercept = chr.lens.sumsum, linetype = 'dotted', size = 0.3)+
                theme(legend.position = 'bottom', legend.title = element_blank())+
                scale_color_manual(values = col)+
                ggtitle(tsb)+guides(colour = guide_legend(override.aes = list(size=3)))

  if(detectChangePoints){
    seg_len = 5
    maf.cpt = detectCP(dat = maf.snp, segLen = seg_len)
    if(is.null(maf.cpt)){
      message('No changepoints detected!')
    }else{
      maf.snp[,id := paste0(Chromosome, ':', Start_Position)]
      maf.snp = maf.snp[!diff %in% 0]
      maf.snp[,minDiff := min(diff), by = .(Chromosome)]
      maf.cpt[,id := paste0(Chromosome, ':', Start_Position)]
      maf.cpt = merge(maf.snp[,.(id, Start_Position_updated, End_Position_updated, minDiff)], maf.cpt[,.(id)])
      maf.cpt[,pos := (End_Position_updated - Start_Position_updated)/2]
      gg.rf = gg.rf+geom_segment(aes(x = Start_Position_updated, y = 0.1, yend = minDiff-0.2,
                                     xend = End_Position_updated), data = maf.cpt, inherit.aes = FALSE,
                                 arrow = arrow(length = unit(0.2, "cm")))
    }
  }


  if(savePlot){
    cowplot::save_plot(filename = paste(tsb, 'rainfallPlot.pdf', sep = '_'),
                       plot = gg.rf, base_height = height, base_width = width)
  }

  gg.rf
}
