@article{Cracco01072023, author = {Cracco, Andrea and Tomescu, Alexandru I.}, title = {Extremely fast construction and querying of compacted and colored de Bruijn graphs with GGCAT}, volume = {33}, number = {7}, pages = {1198-1207}, year = {2023}, doi = {10.1101/gr.277615.122}, abstract ={Compacted de Bruijn graphs are one of the most fundamental data structures in computational genomics. Colored compacted de Bruijn graphs are a variant built on a collection of sequences and associate to each k-mer the sequences in which it appears. We present GGCAT, a tool for constructing both types of graphs, based on a new approach merging the k-mer counting step with the unitig construction step, as well as on numerous practical optimizations. For compacted de Bruijn graph construction, GGCAT achieves speed-ups of 3× to 21× compared with the state-of-the-art tool Cuttlefish 2. When constructing the colored variant, GGCAT achieves speed-ups of 5× to 39× compared with the state-of-the-art tool BiFrost. Additionally, GGCAT is up to 480× faster than BiFrost for batch sequence queries on colored graphs.}, URL = {http://genome.cshlp.org/content/33/7/1198.abstract}, eprint = {http://genome.cshlp.org/content/33/7/1198.full.pdf+html}, journal = {Genome Research} }