001 package calhoun.analysis.crf.test;
002
003 import java.util.ArrayList;
004 import java.util.List;
005
006 import calhoun.analysis.crf.Conrad;
007 import calhoun.analysis.crf.features.interval13.GeneConstraintsInterval13;
008 import calhoun.analysis.crf.io.StringInput;
009 import calhoun.analysis.crf.io.TrainingSequence;
010 import calhoun.analysis.crf.solver.check.ArrayFeatureList;
011 import calhoun.util.AbstractTestCase;
012
013 /** Tests that CRF is working with valid probabilities - the sum of all possible labelings is 1.
014 *
015 * Test that the code to walk through only the valid paths works correctly.
016 * Uses a two state model that disallows transitions to self. 010101... or 101010... are the only allowed paths. */
017 public class GeneConstraintsInterval13Test extends AbstractTestCase {
018
019
020 public void testGeneConstraintsTraining() throws Exception {
021
022 Conrad crf = new Conrad("test/input/interval13/config/markov.xml");
023
024 List<? extends TrainingSequence<?>> train1 =
025 StringInput.prepareData(
026 "000000002222222666661111100000000" + "\n" +
027 "ACACACACATGCACAGTCAGACACATAGACACA" + "\n" +
028 "00000000077777CCCCCC7777000000000000" + "\n" +
029 "ACACACTTACACACCTACACACATACACACACACAC" + "\n");
030
031 System.out.println(train1);
032
033 GeneConstraintsInterval13 gc = new GeneConstraintsInterval13();
034
035 List<TrainingSequence<Character>> train2 = new ArrayList<TrainingSequence<Character>>();
036
037 gc.train(0,crf.getModel(),train2);
038 }
039
040
041 public void testGeneConstraintsEvaluation() throws Exception {
042
043 Conrad crf = new Conrad("test/input/interval13/config/markov.xml");
044
045 GeneConstraintsInterval13 gc = new GeneConstraintsInterval13();
046 List<? extends TrainingSequence<Character>> data = (List<? extends TrainingSequence<Character>>) crf.getInputHandler().readTrainingData("test/input/interval13/data/oneGeneTrain.interval13.txt");
047 crf.trainFeatures(data);
048 gc.train(0, crf.getModel(), data);
049
050 ArrayFeatureList result = new ArrayFeatureList(crf.getModel());
051
052
053 // Check that mod3 stuff done correctly for plus strand donor sites
054 result.clear();
055 gc.evaluateEdge(data.get(0).getInputSequence(), 3, 1, 4, result);
056 assertTrue(result.isValid());
057
058 result.clear();
059 gc.evaluateEdge(data.get(0).getInputSequence(), 3, 2, 5, result);
060 assertTrue(result.isValid());
061
062 result.clear();
063 gc.evaluateEdge(data.get(0).getInputSequence(), 3, 3, 6, result);
064 assertTrue(result.isValid());
065
066 result.clear();
067 gc.evaluateEdge(data.get(0).getInputSequence(), 3, 1, 5, result);
068 assertFalse(result.isValid());
069
070
071 // check taht mod3 stuff done correctly for minus strand acceptor sites
072 result.clear();
073 gc.evaluateEdge(data.get(0).getInputSequence(), 18, 7, 10, result);
074 assertTrue(result.isValid());
075
076 result.clear();
077 gc.evaluateEdge(data.get(0).getInputSequence(), 18, 8, 12, result);
078 assertTrue(result.isValid());
079
080 result.clear();
081 gc.evaluateEdge(data.get(0).getInputSequence(), 18, 9, 11, result);
082 assertTrue(result.isValid());
083
084 result.clear();
085 gc.evaluateEdge(data.get(0).getInputSequence(), 18, 7, 11, result);
086 assertFalse(result.isValid());
087
088 // check that some plus strand stop codons get invalidated for the exon state,
089 // but only invalidated on third position and for exons of correct cut.
090 result.clear();
091 gc.evaluateNode(data.get(0).getInputSequence(), 7, 3, result);
092 assertFalse(result.isValid());
093
094 result.clear();
095 gc.evaluateNode(data.get(0).getInputSequence(), 7, 2, result);
096 assertTrue(result.isValid());
097
098 result.clear();
099 gc.evaluateNode(data.get(0).getInputSequence(), 6, 3, result);
100 assertTrue(result.isValid());
101
102 result.clear();
103 gc.evaluateEdge(data.get(0).getInputSequence(), 7, 3, 3, result);
104 assertFalse(result.isValid());
105
106 result.clear();
107 gc.evaluateEdge(data.get(0).getInputSequence(), 7, 4, 4, result);
108 assertTrue(result.isValid());
109
110 // check that some minus strand stop codons get invalidated for the exon state,
111 // but only invalidated on third position and for exons of correct cut.
112 result.clear();
113 gc.evaluateNode(data.get(0).getInputSequence(), 4, 8, result);
114 assertFalse(result.isValid());
115
116 result.clear();
117 gc.evaluateNode(data.get(0).getInputSequence(), 4, 7, result);
118 assertTrue(result.isValid());
119
120 }
121 }