@article{Faux01072007, author = {Faux, Noel G. and Huttley, Gavin A. and Mahmood, Khalid and Webb, Geoffrey I. and Garcia de la Banda, Maria and Whisstock, James C.}, title = {RCPdb: An evolutionary classification and codon usage database for repeat-containing proteins}, volume = {17}, number = {7}, pages = {1118-1127}, year = {2007}, doi = {10.1101/gr.6255407}, abstract ={Over 3% of human proteins contain single amino acid repeats (repeat-containing proteins, RCPs). Many repeats (homopeptides) localize to important proteins involved in transcription, and the expansion of certain repeats, in particular poly-Q and poly-A tracts, can also lead to the development of neurological diseases. Previous studies have suggested that the homopeptide makeup is a result of the presence of G+C-rich tracts in the encoding genes and that expansion occurs via replication slippage. Here, we have performed a large-scale genomic analysis of the variation of the genes encoding RCPs in 13 species and present these data in an online database (http://repeats.med.monash.edu.au/genetic_analysis/). This resource allows rapid comparison and analysis of RCPs, homopeptides, and their underlying genetic tracts across the eukaryotic species considered. We report three major findings. First, there is a bias for a small subset of codons being reiterated within homopeptides, and there is no G+C or A+T bias relative to the organism’s transcriptome. Second, single base pair transversions from the homocodon are unusually common and may represent a mechanism of reducing the rate of homopeptide mutations. Third, homopeptides that are conserved across different species lie within regions that are under stronger purifying selection in contrast to nonconserved homopeptides.}, URL = {http://genome.cshlp.org/content/17/7/1118.abstract}, eprint = {http://genome.cshlp.org/content/17/7/1118.full.pdf+html}, journal = {Genome Research} }