

# A plotting R script produced by the REVIGO server at http://revigo.irb.hr/
# If you found REVIGO useful in your work, please cite the following reference:
# Supek F et al. "REVIGO summarizes and visualizes long lists of Gene Ontology
# terms" PLoS ONE 2011. doi:10.1371/journal.pone.0021800


# --------------------------------------------------------------------------
# If you don't have the ggplot2 package installed, uncomment the following line:
# install.packages( "ggplot2" );
library( ggplot2 );
# --------------------------------------------------------------------------
# If you don't have the scales package installed, uncomment the following line:
# install.packages( "scales" );
library( scales );


# --------------------------------------------------------------------------
# Here is your data from REVIGO. Scroll down for plot configuration options.

revigo.names <- c("term_ID","description","frequency_%","plot_X","plot_Y","plot_size","log10_p_value","uniqueness","dispensability");
revigo.data <- rbind(c("GO:0008152","metabolic process",75.387,-0.569, 6.720, 6.986,-6.5482,0.962,0.000),
                     c("GO:0009987","cellular process",63.780,-4.171, 0.103, 6.913,-3.0590,0.945,0.000),
                     c("GO:0044237","cellular metabolic process",53.061, 3.666,-4.990, 6.833,-7.8996,0.723,0.000),
                     c("GO:0006807","nitrogen compound metabolic process",38.744,-3.157, 3.621, 6.696,-3.6498,0.798,0.088),
                     c("GO:0043170","macromolecule metabolic process",39.491, 6.357, 4.741, 6.705,-3.6655,0.708,0.089),
                     c("GO:0071704","organic substance metabolic process",58.357,-0.833,-6.524, 6.874,-5.5768,0.801,0.119),
                     c("GO:0044238","primary metabolic process",53.743,-3.009,-3.766, 6.839,-4.3391,0.800,0.120),
                     c("GO:1901360","organic cyclic compound metabolic process",30.324, 4.476, 5.117, 6.590,-2.0610,0.715,0.211),
                     c("GO:0044260","cellular macromolecule metabolic process",34.276, 5.270,-0.169, 6.643,-2.7122,0.581,0.224),
                     c("GO:0006725","cellular aromatic compound metabolic process",29.628, 5.922,-3.492, 6.580,-2.0218,0.668,0.260),
                     c("GO:0046483","heterocycle metabolic process",29.664, 7.345,-3.314, 6.580,-2.0237,0.668,0.260),
                     c("GO:0090304","nucleic acid metabolic process",21.449, 6.309, 0.249, 6.440,-2.0560,0.533,0.431),
                     c("GO:0016070","RNA metabolic process",15.951, 7.325, 0.288, 6.311,-1.3686,0.552,0.577),
                     c("GO:0006139","nucleobase-containing compound metabolic process",26.547, 6.773, 0.678, 6.532,-2.0381,0.552,0.597));

one.data <- data.frame(revigo.data);
names(one.data) <- revigo.names;
one.data <- one.data [(one.data$plot_X != "null" & one.data$plot_Y != "null"), ];
one.data$plot_X <- as.numeric( as.character(one.data$plot_X) );
one.data$plot_Y <- as.numeric( as.character(one.data$plot_Y) );
one.data$plot_size <- as.numeric( as.character(one.data$plot_size) );
one.data$log10_p_value <- as.numeric( as.character(one.data$log10_p_value) );
one.data$frequency <- as.numeric( as.character(one.data$frequency) );
one.data$uniqueness <- as.numeric( as.character(one.data$uniqueness) );
one.data$dispensability <- as.numeric( as.character(one.data$dispensability) );
#head(one.data);


# --------------------------------------------------------------------------
# Names of the axes, sizes of the numbers and letters, names of the columns,
# etc. can be changed below

p1 <- ggplot( data = one.data );
p1 <- p1 + geom_point( aes( plot_X, plot_Y, colour = log10_p_value, size = plot_size), alpha = I(0.6) ) + scale_size_area();
p1 <- p1 + scale_colour_gradientn( colours = c("blue", "green", "yellow", "red"), limits = c( min(one.data$log10_p_value), 0) );
p1 <- p1 + geom_point( aes(plot_X, plot_Y, size = plot_size), shape = 21, fill = "transparent", colour = I (alpha ("black", 0.6) )) + scale_size_area();
p1 <- p1 + scale_size( range=c(5, 30)) + theme_bw(); # + scale_fill_gradientn(colours = heat_hcl(7), limits = c(-300, 0) );
ex <- one.data [ one.data$dispensability < 0.15, ]; 
p1 <- p1 + geom_text( data = ex, aes(plot_X, plot_Y, label = description), colour = I(alpha("black", 0.85)), size = 3 );
p1 <- p1 + labs (y = "semantic space x", x = "semantic space y");
p1 <- p1 + theme(legend.key = element_blank()) ;
one.x_range = max(one.data$plot_X) - min(one.data$plot_X);
one.y_range = max(one.data$plot_Y) - min(one.data$plot_Y);
p1 <- p1 + xlim(min(one.data$plot_X)-one.x_range/10,max(one.data$plot_X)+one.x_range/10);
p1 <- p1 + ylim(min(one.data$plot_Y)-one.y_range/10,max(one.data$plot_Y)+one.y_range/10);



# --------------------------------------------------------------------------
# Output the plot to screen

p1;

# Uncomment the line below to also save the plot to a file.
# The file type depends on the extension (default=pdf).

# ggsave("C:/Users/path_to_your_file/revigo-plot.pdf");


revigo.names <- c("term_ID","description","freqInDbPercent","abslog10pvalue","uniqueness","dispensability","representative");
revigo.data <- rbind(c("GO:0008152","metabolic process",75.387,6.5482,0.962,0.000,"metabolism"),
                     c("GO:0009987","cellular process",63.780,3.0590,0.945,0.000,"cellular process"),
                     c("GO:0044237","cellular metabolic process",53.061,7.8996,0.723,0.000,"cellular metabolism"),
                     c("GO:0071704","organic substance metabolic process",58.357,5.5768,0.801,0.119,"cellular metabolism"),
                     c("GO:0044238","primary metabolic process",53.743,4.3391,0.800,0.120,"cellular metabolism"),
                     c("GO:0006807","nitrogen compound metabolic process",38.744,3.6498,0.798,0.088,"nitrogen compound metabolism"),
                     c("GO:0043170","macromolecule metabolic process",39.491,3.6655,0.708,0.089,"macromolecule metabolism"),
                     c("GO:0044260","cellular macromolecule metabolic process",34.276,2.7122,0.581,0.224,"macromolecule metabolism"),
                     c("GO:0090304","nucleic acid metabolic process",21.449,2.0560,0.533,0.431,"macromolecule metabolism"),
                     c("GO:0006139","nucleobase-containing compound metabolic process",26.547,2.0381,0.552,0.597,"macromolecule metabolism"),
                     c("GO:0016070","RNA metabolic process",15.951,1.3686,0.552,0.577,"macromolecule metabolism"),
                     c("GO:0046483","heterocycle metabolic process",29.664,2.0237,0.668,0.260,"macromolecule metabolism"),
                     c("GO:0006725","cellular aromatic compound metabolic process",29.628,2.0218,0.668,0.260,"macromolecule metabolism"),
                     c("GO:1901360","organic cyclic compound metabolic process",30.324,2.0610,0.715,0.211,"macromolecule metabolism"));

stuff <- data.frame(revigo.data);
names(stuff) <- revigo.names;

stuff$abslog10pvalue <- as.numeric( as.character(stuff$abslog10pvalue) );
stuff$freqInDbPercent <- as.numeric( as.character(stuff$freqInDbPercent) );
stuff$uniqueness <- as.numeric( as.character(stuff$uniqueness) );
stuff$dispensability <- as.numeric( as.character(stuff$dispensability) );

# by default, outputs to a PDF file
pdf( file="revigo_treemap.pdf", width=16, height=9 ) # width and height are in inches

# check the tmPlot command documentation for all possible parameters - there are a lot more
tmPlot(
  stuff,
  index = c("representative","description"),
  vSize = "abslog10pvalue",
  type = "categorical",
  vColor = "representative",
  title = "REVIGO Gene Ontology treemap",
  inflate.labels = FALSE,      # set this to TRUE for space-filling group labels - good for posters
  lowerbound.cex.labels = 0,   # try to draw as many labels as possible (still, some small squares may not get a label)
  bg.labels = "#CCCCCCAA",     # define background color of group labels
  # "#CCCCCC00" is fully transparent, "#CCCCCCAA" is semi-transparent grey, NA is opaque
  position.legend = "none"
)

dev.off()