@article{Nagashima01062003, author = {Nagashima, Takeshi and Silva, Diego G. and Petrovsky, Nikolai and Socha, Luis A. and Suzuki, Harukazu and Saito, Rintaro and Kasukawa, Takeya and Kurochkin, Igor V. and Konagaya, Akihiko and Schönbach, Christian}, title = {Inferring Higher Functional Information for RIKEN Mouse Full-Length cDNA Clones With FACTS}, volume = {13}, number = {6b}, pages = {1520-1533}, year = {2003}, doi = {10.1101/gr.1019903}, abstract ={FACTS (Functional Association/Annotation of cDNA Clones from Text/Sequence Sources) is a semiautomated knowledge discovery and annotation system that integrates molecular function information derived from sequence analysis results (sequence inferred) with functional information extracted from text. Text-inferred information was extracted from keyword-based retrievals of MEDLINE abstracts and by matching of gene or protein names to OMIM, BIND, and DIP database entries. Using FACTS, we found that 47.5% of the 60,770 RIKEN mouse cDNA FANTOM2 clone annotations were informative for text searches. MEDLINE queries yielded molecular interaction-containing sentences for 23.1% of the clones. When disease MeSH and GO terms were matched with retrieved abstracts, 22.7% of clones were associated with potential diseases, and 32.5% with GO identifiers. A significant number (23.5%) of disease MeSH-associated clones were also found to have a hereditary disease association (OMIM Morbidmap). Inferred neoplastic and nervous system disease represented 49.6% and 36.0% of disease MeSH-associated clones, respectively. A comparison of sequence-based GO assignments with informative text-based GO assignments revealed that for 78.2% of clones, identical GO assignments were provided for that clone by either method, whereas for 21.8% of clones, the assignments differed. In contrast, for OMIM assignments, only 28.5% of clones had identical sequence-based and text-based OMIM assignments. Sequence, sentence, and term-based functional associations are included in the FACTS database (http://facts.gsc.riken.go.jp/), which permits results to be annotated and explored through web-accessible keyword and sequence search interfaces. The FACTS database will be a critical tool for investigating the functional complexity of the mouse transcriptome, cDNA-inferred interactome (molecular interactions), and pathome (pathologies).}, URL = {http://genome.cshlp.org/content/13/6b/1520.abstract}, eprint = {http://genome.cshlp.org/content/13/6b/1520.full.pdf+html}, journal = {Genome Research} }