001 package calhoun.analysis.crf.test;
002
003 import java.io.IOException;
004
005 import org.apache.commons.logging.Log;
006 import org.apache.commons.logging.LogFactory;
007
008 import calhoun.analysis.crf.Conrad;
009 import calhoun.util.AbstractTestCase;
010 import calhoun.util.Assert;
011
012 public class Interval13BaselineTest extends AbstractTestCase {
013 private static final Log log = LogFactory.getLog(Interval13BaselineTest.class);
014
015 /* ****************************************************
016 * These first tests show that various configurations of the interval13 model, both the real
017 * semi-Markov models and their toy versions that are Markov models, will run to completion through both training and testing.
018 * Making no effort yet to verify the correctness of results.
019 *
020 * These first few tests are not very strong.
021 */
022
023 public void testUntiedModel() throws Exception {
024 // Tests Conrad on a SEMI-MARKOV model invoking it through COMMAND LINE
025 Conrad.main(new String[] {"train", "test/input/interval13/config/baseline_untied.xml", "test/input/interval13/data/oneGeneTrain.interval13.txt", "test/working/interval13BaselineUntied.ser"});
026
027 Conrad c = Conrad.read("test/working/interval13BaselineUntied.ser");
028 double[] weights = c.getWeights();
029 for(int i=0; i<weights.length; ++i) {
030 log.info(c.getFeatureName(i)+": "+weights[i]);
031 }
032 assertEquals(17, weights.length);
033 }
034
035 public void testSemiMarkovCommandLine() throws Exception {
036 // Tests Conrad on a SEMI-MARKOV model invoking it through COMMAND LINE
037
038 Conrad.main(new String[] {"train", "test/input/interval13/config/semiMarkovZeroPad.xml", "test/input/interval13/data/oneGeneTrain.interval13.txt", "test/working/interval13BaselineModelTest2.ser"});
039
040 Conrad.main(new String[] {"test", "test/working/interval13BaselineModelTest2.ser", "test/input/interval13/data/shortTrain.interval13.txt", "test/working/interval13BaselineModelTestPredicted.txt"});
041 }
042
043 public void testInference() throws Exception {
044
045 // Do first using Semi-Markov model
046 Conrad.main(new String[] {"train", "test/input/interval13/config/semiMarkovZeroPadNoTrain.xml", "test/input/interval13/data/oneGeneTrain.interval13.txt", "test/working/interval13BaselineModelTest.ser"});
047 Conrad.main(new String[] {"test", "test/working/interval13BaselineModelTest.ser", "test/input/interval13/data/oneGeneTrain.interval13.txt", "test/working/interval13SemiMarkovModelTestPredicted.txt"} );
048
049
050 // Do next using Markov model
051 Conrad.main(new String[] {"train", "test/input/interval13/config/markovNoTrain.xml", "test/input/interval13/data/oneGeneTrain.interval13.txt", "test/working/interval13MarkovModelTest.ser"});
052 Conrad.main(new String[] {"test", "test/working/interval13MarkovModelTest.ser", "test/input/interval13/data/oneGeneTrain.interval13.txt", "test/working/interval13MarkovModelTestPredicted.txt"} );
053
054 assertFilesMatch("test/working/interval13SemiMarkovModelTestPredicted.txt","test/working/interval13MarkovModelTestPredicted.txt");
055 }
056
057 public void testTrainingAndInferenceMarkovVsSemiMarkov() throws Exception {
058 String fileModel1 = "test/input/interval13/config/markov.xml";
059 String fileModel2 = "test/input/interval13/config/semiMarkovZeroPad.xml";
060 String fileData = "test/input/interval13/data/oneGeneTrain.interval13.txt";
061
062 compareTwoEquivalentModelsTrainTestSameData(fileModel1,fileModel2,fileData);
063 }
064
065 public void testTrainingAndInferenceCacheProcessorCompare() throws Exception {
066 /* SemiMarkovModel (with no length dependent features, just one NodeBoundary)
067 * using Semi-Markov training and inference, using two different
068 * Cache Processors: AllSparse and CacheProcessorDeluxe
069 */
070 String fileModel1 = "test/input/interval13/config/semiMarkovZeroPad.xml";
071 String fileModel2 = "test/input/interval13/config/semiMarkovZeroPadCPD.xml";
072 String fileData = "test/input/interval13/data/oneGeneTrain.interval13.txt";
073
074 compareTwoEquivalentModelsTrainTestSameData(fileModel1,fileModel2,fileData);
075 }
076
077 public void testMarkovModelWithMarkovTrainingTestingVsSemiMarkovCPDTrainingTesting() throws Exception {
078 /* A markov model using simple engines and using semi-Markov training and CacheProcessorDeluxe
079 */
080 String fileModel1 = "test/input/interval13/config/strictMarkovCPD.xml";
081 String fileModel2 = "test/input/interval13/config/strictMarkov.xml";
082 String fileData = "test/input/interval13/data/oneGeneTrain.interval13.txt";
083
084 compareTwoEquivalentModelsTrainTestSameData(fileModel1,fileModel2,fileData);
085 }
086
087 public void test2() throws Exception {
088 /* Nontrivial test comparing Markov and Semi-Markov training,
089 * using same model and CacheProcessor and data.
090 */
091 String fileModel1 = "test/input/interval13/config/strictMarkovCPD.xml";
092 String fileModel2 = "test/input/interval13/config/strictMarkovCPD2.xml";
093 String fileData = "test/input/interval13/data/oneGeneTrain.interval13.txt";
094
095 compareTwoEquivalentModelsTrainTestSameData(fileModel1,fileModel2,fileData);
096 }
097
098 public void test3() throws Exception {
099 /* Compare two cache processors , CPD and Allsparse, in the case that
100 * the model includes a feature that is genuinely length dependent.
101 */
102 String fileModel1 = "test/input/interval13/config/lengthDependentCPD.xml";
103 String fileModel2 = "test/input/interval13/config/lengthDependentAllSparseNoMin.xml"; // Note: this model is very slow when building cache
104 String fileData = "test/input/interval13/data/oneGeneTrain.interval13.txt";
105
106 // Change the tolerance for the weights. They can be different due to numerical error.
107 compareTwoEquivalentModelsTrainTestSameData(fileModel1,fileModel2,fileData, 0.1);
108
109
110 // The block below will failif run because AllSparseLengthCacheProcessor does not yet support specifying the minimum statelength.
111 /* Dave needs to fix this bc probably a mistake in AllSparseCacheProcessor not looking at min-lengths.
112 * Only difference from test3 is in the ocnfig file for CPD I uncomment the minlengths.
113 */
114 //String fileModel1a = "test/input/interval13/config/lengthDependentCPDuncommented.xml";
115 //String fileModel2a = "test/input/interval13/config/lengthDependentAllSparse.xml"; // Note: this model is very slow when building cache
116 //String fileDataa = "test/input/interval13/data/oneGeneTrain.interval13.txt";
117
118 //compareTwoEquivalentModelsTrainTestSameData(fileModel1a,fileModel2a,fileDataa, 0.1);
119
120
121 }
122
123 public void testReadingBaselineXMLConfigFile() throws Exception {
124 Conrad cr = new Conrad("test/input/interval13/config/ssbaselineSplitInput.xml");
125 System.out.println("Trying to train model");
126 cr.train("test/input/interval13/data/splitInputOneGeneTrain");
127 System.out.println("Trying to test model");
128 cr.test("test/input/inputFilesTest/");
129 }
130
131 public void testReadingSSBaselineCPDXMLConfigFile() throws Exception {
132 Conrad cr = new Conrad("test/input/interval13/config/ssbaselineCPD.xml");
133 System.out.println("Trying to train model");
134 cr.train("test/input/interval13/data/oneGeneTrain.interval13.txt");
135 System.out.println("Trying to test model");
136 cr.test("test/input/interval13/data/oneGeneTrain.interval13.txt");
137 }
138
139 public void compareTwoEquivalentModelsTrainTestSameData( String fileModel1, String fileModel2, String fileData) throws IOException {
140 compareTwoEquivalentModelsTrainTestSameData(fileModel1, fileModel2, fileData, 0.001);
141 }
142
143 public void compareTwoEquivalentModelsTrainTestSameData( String fileModel1, String fileModel2, String fileData, double tolerance ) throws IOException {
144 Conrad cr1 = new Conrad(fileModel1);
145 cr1.train(fileData);
146 double[] weights1 = cr1.getWeights();
147
148 Conrad cr2 = new Conrad(fileModel2);
149 cr2.train(fileData);
150 double[] weights2 = cr2.getWeights();
151
152 Assert.a(weights2.length == weights1.length,"length1 is " + weights1.length + " and weights2 is " + weights2.length);
153 assertArrayEquals(weights2,weights1, tolerance);
154
155 cr1.test(fileData);
156 cr2.test(fileData);
157
158 String s1 = cr1.getOutputHandler().toString();
159 System.out.println("String of output from result1 is");
160 System.out.println(s1);
161
162 String s2 = cr2.getOutputHandler().toString();
163 System.out.println("String of output from result2 is");
164 System.out.println(s2);
165
166 assertEquals(s1,s2);
167 }
168
169 }