001    package calhoun.analysis.crf.test;
002    
003    import java.util.ArrayList;
004    import java.util.List;
005    
006    import calhoun.analysis.crf.Conrad;
007    import calhoun.analysis.crf.features.interval13.GeneConstraintsInterval13;
008    import calhoun.analysis.crf.io.StringInput;
009    import calhoun.analysis.crf.io.TrainingSequence;
010    import calhoun.analysis.crf.solver.check.ArrayFeatureList;
011    import calhoun.util.AbstractTestCase;
012    
013    /** Tests that CRF is working with valid probabilities - the sum of all possible labelings is 1.
014     *  
015     * Test that the code to walk through only the valid paths works correctly.
016     * Uses a two state model that disallows transitions to self.  010101... or 101010... are the only allowed paths. */
017    public class GeneConstraintsInterval13Test extends AbstractTestCase {
018    
019            
020            public void testGeneConstraintsTraining() throws Exception {
021    
022                    Conrad crf = new Conrad("test/input/interval13/config/markov.xml");
023                    
024                    List<? extends TrainingSequence<?>> train1 =
025                            StringInput.prepareData(
026                                    "000000002222222666661111100000000" + "\n" +
027                                    "ACACACACATGCACAGTCAGACACATAGACACA" + "\n" +
028                                    "00000000077777CCCCCC7777000000000000" + "\n" +
029                                    "ACACACTTACACACCTACACACATACACACACACAC" + "\n");
030                    
031                    System.out.println(train1);
032                    
033                    GeneConstraintsInterval13 gc = new GeneConstraintsInterval13();
034                    
035                    List<TrainingSequence<Character>> train2 = new ArrayList<TrainingSequence<Character>>();
036                    
037                    gc.train(0,crf.getModel(),train2);
038            }
039    
040            
041            public void testGeneConstraintsEvaluation() throws Exception {
042    
043                    Conrad crf = new Conrad("test/input/interval13/config/markov.xml");
044                    
045                    GeneConstraintsInterval13 gc = new GeneConstraintsInterval13();
046                    List<? extends TrainingSequence<Character>> data = (List<? extends TrainingSequence<Character>>) crf.getInputHandler().readTrainingData("test/input/interval13/data/oneGeneTrain.interval13.txt");
047                    crf.trainFeatures(data);
048                    gc.train(0, crf.getModel(), data);
049                    
050                    ArrayFeatureList result = new ArrayFeatureList(crf.getModel());
051    
052                    
053                    // Check that mod3 stuff done correctly for plus strand donor sites
054                    result.clear();
055                    gc.evaluateEdge(data.get(0).getInputSequence(), 3, 1, 4, result);
056                    assertTrue(result.isValid());
057    
058                    result.clear();
059                    gc.evaluateEdge(data.get(0).getInputSequence(), 3, 2, 5, result);
060                    assertTrue(result.isValid());
061                    
062                    result.clear();
063                    gc.evaluateEdge(data.get(0).getInputSequence(), 3, 3, 6, result);
064                    assertTrue(result.isValid());
065                    
066                    result.clear();
067                    gc.evaluateEdge(data.get(0).getInputSequence(), 3, 1, 5, result);
068                    assertFalse(result.isValid());
069    
070                    
071                    // check taht mod3 stuff done correctly for minus strand acceptor sites
072                    result.clear();
073                    gc.evaluateEdge(data.get(0).getInputSequence(), 18, 7, 10, result);
074                    assertTrue(result.isValid());
075                    
076                    result.clear();
077                    gc.evaluateEdge(data.get(0).getInputSequence(), 18, 8, 12, result);
078                    assertTrue(result.isValid());
079                    
080                    result.clear();
081                    gc.evaluateEdge(data.get(0).getInputSequence(), 18, 9, 11, result);
082                    assertTrue(result.isValid());
083                    
084                    result.clear();
085                    gc.evaluateEdge(data.get(0).getInputSequence(), 18, 7, 11, result);
086                    assertFalse(result.isValid());
087                    
088                    // check that some plus strand stop codons get invalidated for the exon state,
089                    // but only invalidated on third position and for exons of correct cut.
090                    result.clear();
091                    gc.evaluateNode(data.get(0).getInputSequence(), 7, 3, result);
092                    assertFalse(result.isValid());          
093    
094                    result.clear();
095                    gc.evaluateNode(data.get(0).getInputSequence(), 7, 2, result);
096                    assertTrue(result.isValid());           
097                    
098                    result.clear();
099                    gc.evaluateNode(data.get(0).getInputSequence(), 6, 3, result);
100                    assertTrue(result.isValid());           
101    
102                    result.clear();
103                    gc.evaluateEdge(data.get(0).getInputSequence(), 7, 3, 3, result);
104                    assertFalse(result.isValid());  
105    
106                    result.clear();
107                    gc.evaluateEdge(data.get(0).getInputSequence(), 7, 4, 4, result);
108                    assertTrue(result.isValid());   
109                    
110                    // check that some minus strand stop codons get invalidated for the exon state,
111                    // but only invalidated on third position and for exons of correct cut.
112                    result.clear();
113                    gc.evaluateNode(data.get(0).getInputSequence(), 4, 8, result);
114                    assertFalse(result.isValid());          
115    
116                    result.clear();
117                    gc.evaluateNode(data.get(0).getInputSequence(), 4, 7, result);
118                    assertTrue(result.isValid());           
119                    
120            }
121    }