@article{Margulies01062007, author = {Margulies, Elliott H. and Cooper, Gregory M. and Asimenos, George and Thomas, Daryl J. and Dewey, Colin N. and Siepel, Adam and Birney, Ewan and Keefe, Damian and Schwartz, Ariel S. and Hou, Minmei and Taylor, James and Nikolaev, Sergey and Montoya-Burgos, Juan I. and Löytynoja, Ari and Whelan, Simon and Pardi, Fabio and Massingham, Tim and Brown, James B. and Bickel, Peter and Holmes, Ian and Mullikin, James C. and Ureta-Vidal, Abel and Paten, Benedict and Stone, Eric A. and Rosenbloom, Kate R. and Kent, W. James and Bouffard, Gerard G. and Guan, Xiaobin and Hansen, Nancy F. and Idol, Jacquelyn R. and Maduro, Valerie V.B. and Maskeri, Baishali and McDowell, Jennifer C. and Park, Morgan and Thomas, Pamela J. and Young, Alice C. and Blakesley, Robert W. and Muzny, Donna M. and Sodergren, Erica and Wheeler, David A. and Worley, Kim C. and Jiang, Huaiyang and Weinstock, George M. and Gibbs, Richard A. and Graves, Tina and Fulton, Robert and Mardis, Elaine R. and Wilson, Richard K. and Clamp, Michele and Cuff, James and Gnerre, Sante and Jaffe, David B. and Chang, Jean L. and Lindblad-Toh, Kerstin and Lander, Eric S. and Hinrichs, Angie and Trumbower, Heather and Clawson, Hiram and Zweig, Ann and Kuhn, Robert M. and Barber, Galt and Harte, Rachel and Karolchik, Donna and Field, Matthew A. and Moore, Richard A. and Matthewson, Carrie A. and Schein, Jacqueline E. and Marra, Marco A. and Antonarakis, Stylianos E. and Batzoglou, Serafim and Goldman, Nick and Hardison, Ross and Haussler, David and Miller, Webb and Pachter, Lior and Green, Eric D. and Sidow, Arend}, title = {Analyses of deep mammalian sequence alignments and constraint predictions for 1% of the human genome}, volume = {17}, number = {6}, pages = {760-774}, year = {2007}, doi = {10.1101/gr.6034307}, abstract ={A key component of the ongoing ENCODE project involves rigorous comparative sequence analyses for the initially targeted 1% of the human genome. Here, we present orthologous sequence generation, alignment, and evolutionary constraint analyses of 23 mammalian species for all ENCODE targets. Alignments were generated using four different methods; comparisons of these methods reveal large-scale consistency but substantial differences in terms of small genomic rearrangements, sensitivity (sequence coverage), and specificity (alignment accuracy). We describe the quantitative and qualitative trade-offs concomitant with alignment method choice and the levels of technical error that need to be accounted for in applications that require multisequence alignments. Using the generated alignments, we identified constrained regions using three different methods. While the different constraint-detecting methods are in general agreement, there are important discrepancies relating to both the underlying alignments and the specific algorithms. However, by integrating the results across the alignments and constraint-detecting methods, we produced constraint annotations that were found to be robust based on multiple independent measures. Analyses of these annotations illustrate that most classes of experimentally annotated functional elements are enriched for constrained sequences; however, large portions of each class (with the exception of protein-coding sequences) do not overlap constrained regions. The latter elements might not be under primary sequence constraint, might not be constrained across all mammals, or might have expendable molecular functions. Conversely, 40% of the constrained sequences do not overlap any of the functional elements that have been experimentally identified. Together, these findings demonstrate and quantify how many genomic functional elements await basic molecular characterization.}, URL = {http://genome.cshlp.org/content/17/6/760.abstract}, eprint = {http://genome.cshlp.org/content/17/6/760.full.pdf+html}, journal = {Genome Research} }