@article{Marks01042019, author = {Marks, Patrick and Garcia, Sarah and Barrio, Alvaro Martinez and Belhocine, Kamila and Bernate, Jorge and Bharadwaj, Rajiv and Bjornson, Keith and Catalanotti, Claudia and Delaney, Josh and Fehr, Adrian and Fiddes, Ian T. and Galvin, Brendan and Heaton, Haynes and Herschleb, Jill and Hindson, Christopher and Holt, Esty and Jabara, Cassandra B. and Jett, Susanna and Keivanfar, Nikka and Kyriazopoulou-Panagiotopoulou, Sofia and Lek, Monkol and Lin, Bill and Lowe, Adam and Mahamdallie, Shazia and Maheshwari, Shamoni and Makarewicz, Tony and Marshall, Jamie and Meschi, Francesca and O'Keefe, Christopher J. and Ordonez, Heather and Patel, Pranav and Price, Andrew and Royall, Ariel and Ruark, Elise and Seal, Sheila and Schnall-Levin, Michael and Shah, Preyas and Stafford, David and Williams, Stephen and Wu, Indira and Xu, Andrew Wei and Rahman, Nazneen and MacArthur, Daniel and Church, Deanna M.}, title = {Resolving the full spectrum of human genome variation using Linked-Reads}, volume = {29}, number = {4}, pages = {635-645}, year = {2019}, doi = {10.1101/gr.234443.118}, abstract ={Large-scale population analyses coupled with advances in technology have demonstrated that the human genome is more diverse than originally thought. To date, this diversity has largely been uncovered using short-read whole-genome sequencing. However, these short-read approaches fail to give a complete picture of a genome. They struggle to identify structural events, cannot access repetitive regions, and fail to resolve the human genome into haplotypes. Here, we describe an approach that retains long range information while maintaining the advantages of short reads. Starting from ∼1 ng of high molecular weight DNA, we produce barcoded short-read libraries. Novel informatic approaches allow for the barcoded short reads to be associated with their original long molecules producing a novel data type known as “Linked-Reads”. This approach allows for simultaneous detection of small and large variants from a single library. In this manuscript, we show the advantages of Linked-Reads over standard short-read approaches for reference-based analysis. Linked-Reads allow mapping to 38 Mb of sequence not accessible to short reads, adding sequence in 423 difficult-to-sequence genes including disease-relevant genes STRC, SMN1, and SMN2. Both Linked-Read whole-genome and whole-exome sequencing identify complex structural variations, including balanced events and single exon deletions and duplications. Further, Linked-Reads extend the region of high-confidence calls by 68.9 Mb. The data presented here show that Linked-Reads provide a scalable approach for comprehensive genome analysis that is not possible using short reads alone.}, URL = {http://genome.cshlp.org/content/29/4/635.abstract}, eprint = {http://genome.cshlp.org/content/29/4/635.full.pdf+html}, journal = {Genome Research} }