options(stringsAsFactors = F)
library('pheatmap')
library(ggplot2) 
library(scales) 

# based on Salah's input
clean_ipa_bias <- function(ipa.result) {
  
  my.out <- ipa.result[,c("Upstream.Regulator","Molecule.Type","Activation.z.score","p.value.of.overlap"  )]
  my.biased <- ipa.result$Notes %in% "bias"
  my.out$Activation.z.score[my.biased] <- ipa.result$Bias.corrected.z.score[my.biased]
  
  my.nas <- is.na(my.out$Activation.z.score)
  
  return(my.out[!my.nas,])
}



#########################################################################################################
get_enrich_balloons <- function(my.data.name, my.threshold = 4, 
                                my.colnames = c("Aging_across_tissues", "Heart","Liver","Cerebellum","OB"),
                                my.ips.res) {
  
  # rename
  names(my.ips.res) <- my.colnames
  
  # significant data already filtered by Salah FDR 0.01
  my.pathways <- c()
  
  for (i in 1:length(my.ips.res)) {
    my.pathways <- unique(c(my.pathways,my.ips.res[[i]]$Upstream.Regulator)) # 701
  }
  
  ####
  # prepare output data
  # p-val matrix
  my.matrix <- matrix(0,length(my.pathways),length(my.colnames)) # default: -log10(1) pval == 0 no enrichment
  
  # Enrichment matrix
  my.matrix2 <- matrix(0,length(my.pathways),length(my.colnames)) 
  # initialize with Enrichment = 0 if no enrich
  # use bias corrected z-score when available
  
  # matrix with record of significance
  my.matrix3 <- matrix(0,length(my.pathways),length(my.colnames)) # to get sigificant pathways
  
  # track NAs that come with IPA
  
  my.na.vec <- rep(FALSE,length(my.pathways))# no NA a priori
  
  colnames(my.matrix)  <- my.colnames
  colnames(my.matrix2) <- my.colnames
  rownames(my.matrix)  <- my.pathways
  rownames(my.matrix2) <- my.pathways
  colnames(my.matrix3) <- my.colnames
  rownames(my.matrix3) <- my.pathways
  
  # collect data from files
  for (i in 1:length(my.pathways)) {
    #print(my.pathways[i])
    
    for (j in 1:length(my.colnames)) { # tissues 
      
      my.id <- which(my.ips.res[[j]]$Upstream.Regulator %in% my.pathways[i])
      #print(paste(j, length(my.id)))
      if(length(my.id) == 1) { # if was significant in this tissue (and not on both tail ends, which would be 2)
        #print('IN')
        
        my.matrix[i,j] <- -log10(my.ips.res[[j]]$p.value.of.overlap[my.id]) # log(0) is undefined
        my.matrix2[i,j] <- my.ips.res[[j]]$Activation.z.score[my.id]
        my.matrix3[i,j] <- 1
        
        if(is.na(my.ips.res[[j]]$Activation.z.score[my.id])) {
          my.na.vec[i] <- TRUE
        }
        
      }
      
    }
  }
  
  # remove NA lines
  my.matrix  <- my.matrix[!my.na.vec,] 
  my.matrix2 <- my.matrix2[!my.na.vec,]
  my.matrix3 <- my.matrix3[!my.na.vec,]
  
  # find pathways significant in threshold or more tissues (5 potential from our experiments)
  my.sigs <- apply(my.matrix3[,c(1:5)],1,sum) >= my.threshold    
  my.res.enrich <- data.frame(my.matrix2[my.sigs,])
  my.pval.enrich <- data.frame(my.matrix[my.sigs,])
  
  # sort by average change
  my.average <- apply(my.res.enrich[,c(1:5)],1,mean)
  my.sorted <- sort(my.average,index.return=T, decreasing=T)
  my.res.enrich2 <- my.res.enrich[my.sorted$ix,]
  
  my.pval.enrich2 <- data.frame(my.pval.enrich[my.sorted$ix,])
  
  my.txtname <- paste(Sys.Date(),"Enrichment_table_minuslog10pval",my.data.name,"pathways_significant_in",my.threshold,"or_more.txt", sep="_")
  write.table(my.pval.enrich2,file=my.txtname,sep="\t",quote=F)
  
  my.txtname2 <- paste(Sys.Date(),"Enrichment_table_Zscores",my.data.name,"pathways_significant_in",my.threshold,"or_more.txt", sep="_")
  write.table(my.res.enrich2,file=my.txtname2,sep="\t",quote=F)
  
  
  my.res.enrich2$Pathnames <- rownames(my.res.enrich2)
  
  # format for ggplot
  my.res.enrich3 <- cbind(my.res.enrich2[,c('Pathnames',my.colnames[1])],rep(my.colnames[1],dim(my.res.enrich2)[1]),my.pval.enrich2[,my.colnames[1]])
  colnames(my.res.enrich3) <- c('Pathnames','aging_Zscore','condition','minusLog10Pval')
  for ( h in 2:length(my.colnames)) {
    my.new <- cbind(my.res.enrich2[,c('Pathnames',my.colnames[h])],rep(my.colnames[h],dim(my.res.enrich2)[1]),my.pval.enrich2[,my.colnames[h]])
    colnames(my.new) <- colnames(my.res.enrich3)
    my.res.enrich3 <- rbind(my.res.enrich3, 
                            my.new)
    
  }
  
  # create color scheme, centered on 0
  my.max <- max(my.res.enrich3$aging_Zscore)
  my.min <- min(my.res.enrich3$aging_Zscore)
  my.values <- c(my.min,0.75*my.min,0.5*my.min,0.25*my.min,0,0.25*my.max,0.5*my.max,0.75*my.max,my.max)
  my.scaled <- rescale(my.values, to = c(0, 1))
  my.color.vector <- c("darkblue","dodgerblue4","dodgerblue3","dodgerblue1","white","lightcoral","brown1","firebrick2","firebrick4")
  
  # to preserve the wanted order
  my.res.enrich3$condition <- factor(my.res.enrich3$condition, levels = unique(my.res.enrich3$condition))
  my.res.enrich3$Pathnames <- factor(my.res.enrich3$Pathnames, levels = rev(unique(my.res.enrich3$Pathnames)))
  
  my.pdfname <- paste(Sys.Date(),"Enrichment_BALLOON_plot",my.data.name,"pathways_significant_in",my.threshold,"or_more.pdf", sep="_")
  
  pdf(my.pdfname, onefile=F, height = max(5, sum(my.sigs)/3), width=12)
  my.plot <- ggplot(my.res.enrich3,aes(x=condition,y=Pathnames,colour=aging_Zscore,size=minusLog10Pval))+ theme_bw()+ geom_point(shape = 16) 
  my.plot <- my.plot + ggtitle("Aging dysregylated pathways") + labs(x = "Tissue/condition", y = "Gene Set")
  my.plot <- my.plot + scale_colour_gradientn(colours = my.color.vector,space = "Lab", na.value = "grey50", guide = "colourbar", values = my.scaled)
  print(my.plot)
  dev.off()  
  
}
