library('pheatmap')
library(ggplot2) 
library(scales) 


#########################################################################################################
# For debug purposes
# my.data.name <- "MSIgDB_Hallmark_Datasets"
# my.threshold = 4
# my.colnames = c("Aging_across_tissues", "Heart","Liver","Cerebellum","OB", "NPCs")

get_enrich_balloons <- function(my.data.name, my.threshold = 4, 
                                my.colnames = c("Aging_across_tissues", "Heart","Liver","Cerebellum","OB", "NPCs") ) {
  
  my.enrich.sets <- list.files("../FDR5percent/",pattern = my.data.name)
  
  # reorder files based on colnames
  my.columns <- c() 
  
  for (i in 1:length(my.colnames)) {
    my.columns <- c(my.columns,
                    grep(paste("_",my.colnames[i],"_",my.data.name,sep=""),my.enrich.sets))
    
  }
  
  
  # get data from significant FDR 0.05
  my.tissues.kegg <- vector(length=length(my.colnames), mode="list")
  names(my.tissues.kegg) <- my.colnames
  my.pathways <- c()
  
  for ( i in 1:length(my.columns)) {
    my.file <- my.enrich.sets[my.columns[i]]
    #print(my.file);
    my.tissues.kegg[[i]]  <- read.csv(paste("../FDR5percent/",my.file,sep=""),
                                      sep="\t", header=T)
    
    my.pathways <- unique(c(my.pathways,my.tissues.kegg[[i]]$Gene_Set))
    
  }
  
  
  
  ####
  # prepapre output data
  # p-val matrix
  my.matrix <- matrix(0,length(my.pathways),length(my.colnames)) # default: -log10(1) pval == 0 no enrichment
  
  # Enrichment matrix
  my.matrix2 <- matrix(0,length(my.pathways),length(my.colnames)) # initialize with Enrichment = 0 if no enrich
  
  # matrix with record of significance
  my.matrix3 <- matrix(0,length(my.pathways),length(my.colnames)) # to get sigificant pathways
  
  colnames(my.matrix) <- my.colnames
  colnames(my.matrix2) <- my.colnames
  rownames(my.matrix) <- my.pathways
  rownames(my.matrix2) <- my.pathways
  colnames(my.matrix3) <- my.colnames
  rownames(my.matrix3) <- my.pathways
  
  # collect data from files
  for (i in 1:length(my.pathways)) {
    #print(my.pathways[i])
    
    for (j in 1:length(my.colnames)) { # tissues 
      
      my.id <- which(my.tissues.kegg[[j]]$Gene_Set %in% my.pathways[i])
      #print(paste(j, length(my.id)))
      if(length(my.id) == 1) { # if was significant in this tissue (and not on both tail ends, which would be 2)
        #print('IN')
        
        my.matrix[i,j] <- -log10(my.tissues.kegg[[j]]$p.val[my.id]) # log(0) is undefined
        
        if (my.tissues.kegg[[j]]$Direction[my.id] == 'UP') {
          my.matrix2[i,j] <- my.tissues.kegg[[j]]$Enrichment[my.id]
        } else if (my.tissues.kegg[[j]]$Direction[my.id] == 'DOWN'){
          my.matrix2[i,j] <- - my.tissues.kegg[[j]]$Enrichment[my.id]
        }
                
        my.matrix3[i,j] <- 1
        
      }
      
    }
  }
  
  
  # find pathways significant in threshold or more tissues (6 potential from our experiments)
  my.sigs <- apply(my.matrix3[,c(1:6)],1,sum) >= my.threshold    
  my.res.enrich <- data.frame(my.matrix2[my.sigs,])
  my.pval.enrich <- data.frame(my.matrix[my.sigs,])
  
  # sort by average change
  my.average <- apply(my.res.enrich[,c(1:6)],1,mean)
  my.sorted <- sort(my.average,index.return=T,decreasing=T)
  my.res.enrich2 <- my.res.enrich[my.sorted$ix,]
  
  my.pval.enrich2 <- data.frame(my.pval.enrich[my.sorted$ix,])
  
  my.txtname <- paste('./Stats_tables/',
                      paste(Sys.Date(),"Enrichment_table",my.data.name,"pathways_significant_in",my.threshold,"or_more.txt", sep="_"),
                      sep="")
  
  write.table(my.res.enrich2,file=my.txtname,sep="\t",quote=F)
  
  my.res.enrich2$Pathnames <- rownames(my.res.enrich2)
  
  # format for ggplot
  my.res.enrich3 <- cbind(my.res.enrich2[,c('Pathnames',my.colnames[1])],rep(my.colnames[1],dim(my.res.enrich2)[1]),my.pval.enrich2[,my.colnames[1]])
  colnames(my.res.enrich3) <- c('Pathnames','aging_signed_enricment','condition','minusLog10Pval')
  for ( h in 2:length(my.colnames)) {
    my.new <- cbind(my.res.enrich2[,c('Pathnames',my.colnames[h])],rep(my.colnames[h],dim(my.res.enrich2)[1]),my.pval.enrich2[,my.colnames[h]])
    colnames(my.new) <- colnames(my.res.enrich3)
    my.res.enrich3 <- rbind(my.res.enrich3, 
                            my.new)
    
  }
  
  
  my.max <- max(my.res.enrich3$aging_signed_enricment)
  my.min <- min(my.res.enrich3$aging_signed_enricment)
  my.values <- c(my.min,0.75*my.min,0.5*my.min,0.25*my.min,0,0.25*my.max,0.5*my.max,0.75*my.max,my.max)
  my.scaled <- rescale(my.values, to = c(0, 1))
  my.color.vector <- c("darkblue","dodgerblue4","dodgerblue3","dodgerblue1","white","lightcoral","brown1","firebrick2","firebrick4")
  
  # to preserve the wanted order
  my.res.enrich3$condition <- factor(my.res.enrich3$condition, levels = unique(my.res.enrich3$condition))
  my.res.enrich3$Pathnames <- factor(my.res.enrich3$Pathnames, levels = rev(unique(my.res.enrich3$Pathnames)))
  
  my.pdfname <- paste('./MYDATA/',
                      paste(Sys.Date(),"Enrichment_BALLOON_plot",my.data.name,"pathways_significant_in",my.threshold,"or_more.pdf", sep="_"),
                      sep="")
  
  pdf(my.pdfname, onefile=F, height = max(5, sum(my.sigs)/3), width=12)
  my.plot <- ggplot(my.res.enrich3,aes(x=condition,y=Pathnames,colour=aging_signed_enricment,size=minusLog10Pval))+ theme_bw()+ geom_point(shape = 16) 
  my.plot <- my.plot + ggtitle("Aging dysregylated pathways") + labs(x = "Tissue/condition", y = "Gene Set")
  my.plot <- my.plot + scale_colour_gradientn(colours = my.color.vector,space = "Lab", na.value = "grey50", guide = "colourbar", values = my.scaled)
  print(my.plot)
  dev.off()  
  
}
