@article{Sigalova01052025, author = {Sigalova, Olga M. and Forneris, Mattia and Stojanovska, Frosina and Zhao, Bingqing and Viales, Rebecca R. and Rabinowitz, Adam and Hammal, Fayrouz and Ballester, Benoît and Zaugg, Judith B. and Furlong, Eileen E.M.}, title = {Integrating genetic variation with deep learning provides context for variants impacting transcription factor binding during embryogenesis}, volume = {35}, number = {5}, pages = {1138-1153}, year = {2025}, doi = {10.1101/gr.279652.124}, abstract ={Understanding how genetic variation impacts transcription factor (TF) binding remains a major challenge, limiting our ability to model disease-associated variants. Here, we used a highly controlled system of F1 crosses with extensive genetic diversity to profile allele-specific binding of four TFs at several time points during Drosophila embryogenesis. Using a combined haplotype test, we identified 9%–18% of TF-bound regions impacted by genetic variation even for essential regulators. By expanding WASP (a tool for allele-specific read mapping) to examine indels, we increased detection of allelically imbalanced peaks by 30%–50%. This fine-grained “mutagenesis” can reconstruct functionalized binding motifs for all factors. To prioritize causal variants, we trained a convolutional neural network (Basenji) to accurately predict binding from DNA sequence. The model can also predict measured allelic imbalance for strong effect variants, providing a mechanistic interpretation for how the variant impacts binding. This reveals unexpected relationships between TFs, including potential cooperative pairs, and mechanisms of tissue-specific recruitment of the ubiquitous factor CTCF.}, URL = {http://genome.cshlp.org/content/35/5/1138.abstract}, eprint = {http://genome.cshlp.org/content/35/5/1138.full.pdf+html}, journal = {Genome Research} }