@article{Bilgrav Saether01112024, author = {Bilgrav Saether, Kristine and Eisfeldt, Jesper and Bengtsson, Jesse D. and Lun, Ming Yin and Grochowski, Christopher M. and Mahmoud, Medhat and Chao, Hsiao-Tuan and Rosenfeld, Jill A. and Liu, Pengfei and Ek, Marlene and Schuy, Jakob and Ameur, Adam and Dai, Hongzheng and Undiagnosed Diseases Network and Hwang, James Paul and Sedlazeck, Fritz J. and Bi, Weimin and Marom, Ronit and Wincent, Josephine and Nordgren, Ann and Carvalho, Claudia M.B. and Lindstrand, Anna}, title = {Leveraging the T2T assembly to resolve rare and pathogenic inversions in reference genome gaps}, volume = {34}, number = {11}, pages = {1785-1797}, year = {2024}, doi = {10.1101/gr.279346.124}, abstract ={Chromosomal inversions (INVs) are particularly challenging to detect due to their copy-number neutral state and association with repetitive regions. Inversions represent about 1/20 of all balanced structural chromosome aberrations and can lead to disease by gene disruption or altering regulatory regions of dosage-sensitive genes in cis. Short-read genome sequencing (srGS) can only resolve ∼70% of cytogenetically visible inversions referred to clinical diagnostic laboratories, likely due to breakpoints in repetitive regions. Here, we study 12 inversions by long-read genome sequencing (lrGS) (n = 9) or srGS (n = 3) and resolve nine of them. In four cases, the inversion breakpoint region was missing from at least one of the human reference genomes (GRCh37, GRCh38, T2T-CHM13) and a reference agnostic analysis was needed. One of these cases, an INV9 mappable only in de novo assembled lrGS data using T2T-CHM13 disrupts EHMT1 consistent with a Mendelian diagnosis (Kleefstra syndrome 1; MIM#610253). Next, by pairwise comparison between T2T-CHM13, GRCh37, and GRCh38, as well as the chimpanzee and bonobo, we show that hundreds of megabases of sequence are missing from at least one human reference, highlighting that primate genomes contribute to genomic diversity. Aligning population genomic data to these regions indicated that these regions are variable between individuals. Our analysis emphasizes that T2T-CHM13 is necessary to maximize the value of lrGS for optimal inversion detection in clinical diagnostics. These results highlight the importance of leveraging diverse and comprehensive reference genomes to resolve unsolved molecular cases in rare diseases.}, URL = {http://genome.cshlp.org/content/34/11/1785.abstract}, eprint = {http://genome.cshlp.org/content/34/11/1785.full.pdf+html}, journal = {Genome Research} }