@article{Borozan01122025, author = {Borozan, Bartol and Prusina, Tomislav and Borozan, Luka and Ševerdija, Domagoj and Rojas Ringeling, Francisca and Matijević, Domagoj and Canzar, Stefan}, title = {Optimal marker genes for c-separated cell types with SepSolve}, volume = {35}, number = {12}, pages = {2770-2780}, year = {2025}, doi = {10.1101/gr.280637.125}, abstract ={The identification of cell types in single-cell RNA-seq studies relies on the distinct expression signature of marker genes. A small set of target genes is also needed to design probes for targeted spatial transcriptomic experiments and to target proteins in single-cell spatial proteomics or for cell sorting. Although traditional approaches have relied on testing one gene at a time for differential expression between a given cell type and the rest, more recent methods have highlighted the benefits of a joint selection of markers that together distinguish all pairs of cell types simultaneously. However, existing methods either consider all pairs of individual cells, which becomes intractable even for medium-sized data sets, or ignore intra-cell-type expression variation entirely by collapsing all cells of a given type to a single representative. Here, we address these limitations and propose to find a small set of genes such that cell types are c-separated in the selected dimensions, a notion introduced previously in learning a mixture of Gaussians. To this end, we formulate a linear program that naturally takes into account expression variation within cell types without including each pair of individual cells in the model, leading to a highly stable set of marker genes that allow to accurately discriminate between cell types and that can be computed to optimality efficiently.}, URL = {http://genome.cshlp.org/content/35/12/2770.abstract}, eprint = {http://genome.cshlp.org/content/35/12/2770.full.pdf+html}, journal = {Genome Research} }