001    package calhoun.analysis.crf.test;
002    
003    import java.io.IOException;
004    
005    import org.apache.commons.logging.Log;
006    import org.apache.commons.logging.LogFactory;
007    
008    import calhoun.analysis.crf.Conrad;
009    import calhoun.util.AbstractTestCase;
010    import calhoun.util.Assert;
011    
012    public class Interval13BaselineTest extends AbstractTestCase {
013            private static final Log log = LogFactory.getLog(Interval13BaselineTest.class);
014    
015            /* ****************************************************
016             * These first tests show that various configurations of the interval13 model, both the real
017             * semi-Markov models and their toy versions that are Markov models, will run to completion through both training and testing.
018             * Making no effort yet to verify the correctness of results.
019             * 
020             * These first few tests are not very strong.
021             */
022    
023            public void testUntiedModel() throws Exception {
024                    // Tests Conrad on a SEMI-MARKOV model invoking it through COMMAND LINE
025                    Conrad.main(new String[] {"train", "test/input/interval13/config/baseline_untied.xml", "test/input/interval13/data/oneGeneTrain.interval13.txt", "test/working/interval13BaselineUntied.ser"});
026    
027                    Conrad c = Conrad.read("test/working/interval13BaselineUntied.ser");
028                    double[] weights = c.getWeights();
029                    for(int i=0; i<weights.length; ++i) {
030                            log.info(c.getFeatureName(i)+": "+weights[i]);
031                    }
032                    assertEquals(17, weights.length);
033            }
034                    
035            public void testSemiMarkovCommandLine() throws Exception {
036                    // Tests Conrad on a SEMI-MARKOV model invoking it through COMMAND LINE
037    
038                    Conrad.main(new String[] {"train", "test/input/interval13/config/semiMarkovZeroPad.xml", "test/input/interval13/data/oneGeneTrain.interval13.txt", "test/working/interval13BaselineModelTest2.ser"});
039    
040                    Conrad.main(new String[] {"test", "test/working/interval13BaselineModelTest2.ser", "test/input/interval13/data/shortTrain.interval13.txt", "test/working/interval13BaselineModelTestPredicted.txt"});
041            }
042                    
043            public void testInference() throws Exception {
044                    
045                    // Do first using Semi-Markov model
046                    Conrad.main(new String[] {"train", "test/input/interval13/config/semiMarkovZeroPadNoTrain.xml", "test/input/interval13/data/oneGeneTrain.interval13.txt", "test/working/interval13BaselineModelTest.ser"});
047                    Conrad.main(new String[] {"test", "test/working/interval13BaselineModelTest.ser", "test/input/interval13/data/oneGeneTrain.interval13.txt", "test/working/interval13SemiMarkovModelTestPredicted.txt"} );
048                    
049                    
050                    // Do next using Markov model
051                    Conrad.main(new String[] {"train", "test/input/interval13/config/markovNoTrain.xml", "test/input/interval13/data/oneGeneTrain.interval13.txt", "test/working/interval13MarkovModelTest.ser"});
052                    Conrad.main(new String[] {"test", "test/working/interval13MarkovModelTest.ser", "test/input/interval13/data/oneGeneTrain.interval13.txt", "test/working/interval13MarkovModelTestPredicted.txt"} );
053                    
054                    assertFilesMatch("test/working/interval13SemiMarkovModelTestPredicted.txt","test/working/interval13MarkovModelTestPredicted.txt");      
055            }
056    
057            public void testTrainingAndInferenceMarkovVsSemiMarkov() throws Exception {     
058                    String fileModel1 = "test/input/interval13/config/markov.xml";
059                    String fileModel2 = "test/input/interval13/config/semiMarkovZeroPad.xml";
060                    String fileData = "test/input/interval13/data/oneGeneTrain.interval13.txt";
061                            
062                    compareTwoEquivalentModelsTrainTestSameData(fileModel1,fileModel2,fileData);
063            }
064    
065            public void testTrainingAndInferenceCacheProcessorCompare() throws Exception {
066                    /* SemiMarkovModel (with no length dependent features, just one NodeBoundary) 
067                     * using Semi-Markov training and inference, using two different
068                     * Cache Processors: AllSparse and CacheProcessorDeluxe
069                     */ 
070                    String fileModel1 = "test/input/interval13/config/semiMarkovZeroPad.xml";
071                    String fileModel2 = "test/input/interval13/config/semiMarkovZeroPadCPD.xml";
072                    String fileData = "test/input/interval13/data/oneGeneTrain.interval13.txt";
073                            
074                    compareTwoEquivalentModelsTrainTestSameData(fileModel1,fileModel2,fileData);            
075            }
076    
077            public void testMarkovModelWithMarkovTrainingTestingVsSemiMarkovCPDTrainingTesting() throws Exception {
078                    /* A markov model using simple engines and using semi-Markov training and CacheProcessorDeluxe
079                     */
080                    String fileModel1 = "test/input/interval13/config/strictMarkovCPD.xml";
081                    String fileModel2 = "test/input/interval13/config/strictMarkov.xml";
082                    String fileData = "test/input/interval13/data/oneGeneTrain.interval13.txt";
083                            
084                    compareTwoEquivalentModelsTrainTestSameData(fileModel1,fileModel2,fileData);            
085            }
086            
087            public void test2() throws Exception {
088                    /* Nontrivial test comparing Markov and Semi-Markov training,
089                     * using same model and CacheProcessor and data.
090                     */
091                    String fileModel1 = "test/input/interval13/config/strictMarkovCPD.xml";
092                    String fileModel2 = "test/input/interval13/config/strictMarkovCPD2.xml";
093                    String fileData = "test/input/interval13/data/oneGeneTrain.interval13.txt";
094                            
095                    compareTwoEquivalentModelsTrainTestSameData(fileModel1,fileModel2,fileData);            
096            }
097            
098            public void test3() throws Exception {
099                    /* Compare two cache processors , CPD and Allsparse, in the case that
100                     * the model includes a feature that is genuinely length dependent.
101                     */
102                    String fileModel1 = "test/input/interval13/config/lengthDependentCPD.xml";
103                    String fileModel2 = "test/input/interval13/config/lengthDependentAllSparseNoMin.xml"; // Note: this model is very slow when building cache
104                    String fileData = "test/input/interval13/data/oneGeneTrain.interval13.txt";
105                            
106                    // Change the tolerance for the weights.  They can be different due to numerical error.
107                    compareTwoEquivalentModelsTrainTestSameData(fileModel1,fileModel2,fileData, 0.1);               
108                    
109                    
110                    // The block below will failif run because AllSparseLengthCacheProcessor does not yet support specifying the minimum statelength.
111                    /* Dave needs to fix this bc probably a mistake in AllSparseCacheProcessor not looking at min-lengths.
112                     * Only difference from test3 is in the ocnfig file for CPD I uncomment the minlengths.
113                     */
114                    //String fileModel1a = "test/input/interval13/config/lengthDependentCPDuncommented.xml";
115                    //String fileModel2a = "test/input/interval13/config/lengthDependentAllSparse.xml"; // Note: this model is very slow when building cache
116                    //String fileDataa = "test/input/interval13/data/oneGeneTrain.interval13.txt";
117                            
118                    //compareTwoEquivalentModelsTrainTestSameData(fileModel1a,fileModel2a,fileDataa, 0.1);          
119                    
120                    
121            }
122            
123            public void testReadingBaselineXMLConfigFile() throws Exception {
124                    Conrad cr = new Conrad("test/input/interval13/config/ssbaselineSplitInput.xml");
125                    System.out.println("Trying to train model");
126                    cr.train("test/input/interval13/data/splitInputOneGeneTrain"); 
127                    System.out.println("Trying to test model");
128                    cr.test("test/input/inputFilesTest/");
129            }
130    
131            public void testReadingSSBaselineCPDXMLConfigFile() throws Exception {
132                    Conrad cr = new Conrad("test/input/interval13/config/ssbaselineCPD.xml");
133                    System.out.println("Trying to train model");
134                    cr.train("test/input/interval13/data/oneGeneTrain.interval13.txt");
135                    System.out.println("Trying to test model");
136                    cr.test("test/input/interval13/data/oneGeneTrain.interval13.txt");
137            }
138            
139            public void compareTwoEquivalentModelsTrainTestSameData( String fileModel1, String fileModel2, String fileData) throws IOException {
140                    compareTwoEquivalentModelsTrainTestSameData(fileModel1, fileModel2, fileData, 0.001);
141            }
142            
143            public void compareTwoEquivalentModelsTrainTestSameData( String fileModel1, String fileModel2, String fileData, double tolerance ) throws IOException {
144                    Conrad cr1 = new Conrad(fileModel1);
145                    cr1.train(fileData);
146                    double[] weights1 = cr1.getWeights();   
147                    
148                    Conrad cr2 = new Conrad(fileModel2);
149                    cr2.train(fileData);
150                    double[] weights2 = cr2.getWeights();   
151                    
152                    Assert.a(weights2.length == weights1.length,"length1 is " + weights1.length  + "   and weights2 is " + weights2.length);
153                    assertArrayEquals(weights2,weights1, tolerance);                
154    
155                    cr1.test(fileData);     
156                    cr2.test(fileData);                             
157    
158                    String s1 = cr1.getOutputHandler().toString();
159                    System.out.println("String of output from result1 is");
160                    System.out.println(s1);
161                    
162                    String s2 = cr2.getOutputHandler().toString();
163                    System.out.println("String of output from result2 is");
164                    System.out.println(s2);
165                    
166                    assertEquals(s1,s2);
167            }
168            
169    }