001    package calhoun.analysis.crf.test;
002    
003    import java.util.ArrayList;
004    import java.util.List;
005    
006    import org.apache.commons.logging.Log;
007    import org.apache.commons.logging.LogFactory;
008    
009    import calhoun.analysis.crf.executables.HiddenSequenceConverter;
010    import calhoun.analysis.crf.io.InputHandler;
011    import calhoun.analysis.crf.io.InputHandlerInterleaved;
012    import calhoun.analysis.crf.io.SequenceConverter;
013    import calhoun.analysis.crf.io.StringInput;
014    import calhoun.analysis.crf.io.TrainingSequence;
015    import calhoun.util.AbstractTestCase;
016    import calhoun.util.Assert;
017    
018    public class SequenceConverterTest  extends AbstractTestCase {
019            
020            private static final Log log = LogFactory.getLog(SequenceConverterTest.class);
021            boolean debug = log.isDebugEnabled();
022    
023            
024            public void testTricycle13Interval13Conversion() throws Exception {
025                    System.out.println("Testing Hidden Sequence Converter between Interval13 and Tricycle13 and back.");
026                    
027                    //               This has nothing to do with GTF but this config file does what we want
028                    //ConfigurableModelManager cm= new ConfigurableModelManager("test/input/configGTF.txt");  
029    
030                    InputHandler ih = new InputHandlerInterleaved(new StringInput());
031                    List<? extends TrainingSequence<?>> data = ih.readTrainingData("test/input/shortTrain.tricycle13.txt");
032                    List<TrainingSequence<Character>> listOfSeqs = new ArrayList<TrainingSequence<Character>>();            
033                    
034                    convertSequence(data, listOfSeqs);
035                    
036                    ih.writeTrainingData("test/working/shortTrain.interval13.txt", listOfSeqs);
037                    
038                    // Test below is a little weak right now (20060705) as I used this test to create the file in input; but will get
039                    // stronger as I plan to use the file "test/input/shortTrain.interval13.txt" elsewhere in testing 
040                    assertFilesMatch("test/input/interval13/data/shortTrain.interval13.txt","test/working/shortTrain.interval13.txt");
041                    
042                    //
043                    // SAME TEST, DIFFERENT FILE
044                    //
045                    ih = new InputHandlerInterleaved(new StringInput());
046                    List<? extends TrainingSequence<?>> data2 = ih.readTrainingData("test/input/negStrandGene.tricycle13.txt");
047                    List<TrainingSequence<Character>> listOfSeqs2 = new ArrayList<TrainingSequence<Character>>();           
048                    
049                    convertSequence(data2, listOfSeqs2);
050                    
051                    ih.writeTrainingData("test/working/negStrandGene.interval13.txt", listOfSeqs2);
052                    
053                    assertFilesMatch("test/input/interval13/data/negStrandGene.interval13.txt","test/working/negStrandGene.interval13.txt");
054            }
055            
056            private void convertSequence(List<? extends TrainingSequence<?>> data, List<TrainingSequence<Character>> listOfSeqs) {
057                    for (int i=0; i<data.size(); i++) {
058                            TrainingSequence<Character> seq = (TrainingSequence<Character>) data.get(i);
059                            int len = seq.length();
060                            
061                            int[] oldy = new int[len];
062                            for (int j=0; j<len; j++) { oldy[j] = seq.getY(j); }
063                    
064                            SequenceConverter.convertSeqFromTricycle13ToInterval13(seq);
065                            
066                            listOfSeqs.add(seq);
067                            
068                            SequenceConverter.convertSeqFromInterval13ToTricycle13(seq);                    
069    
070                            int[] newy = new int[len];
071                            for (int j=0; j<len; j++) {
072                                    newy[j] = seq.getY(j);
073                                    Assert.a(newy[j] == oldy[j]);
074                            }
075                            
076                            SequenceConverter.convertSeqFromTricycle13ToInterval13(seq);
077                    }
078            }
079            
080            
081            public void testSequenceConverter() throws Exception {
082                    System.out.println("Testing Hidden Sequence Converter.");
083                    String[] args13 = new String[4];
084                    String[] args39 = new String[4];
085                    
086                    args13[0] = "13to39";
087                    args13[1] = "test/input/ghmmUsingCrfIO/basic_ModelDSComp.xml";  // config file
088                    args13[2]   = "test/input/ghmmUsingCrfIO/cnDT_chr14_60k.txt";   
089                    args13[3] = "test/working/cnDT_chr14_60k_39states.txt";
090    
091                    args39[0] = "39to13";
092                    args39[1] = "test/input/ghmmUsingCrfIO/basic_ModelDSComp.xml";  // config file
093                    args39[2] = args13[3];
094                    args39[3] = "test/working/cnDT_chr14_60k_13states.txt";
095                    
096                    HiddenSequenceConverter.main(args13);
097                    HiddenSequenceConverter.main(args39);
098            }
099    
100            public void testSequenceConverter13models() throws Exception {
101                    System.out.println("Testing Hidden Sequence Converter.");
102                    String[] argsTri2Int = new String[3];
103                    String[] argsInt2Tri = new String[3];
104                    
105                    argsTri2Int[0] = "tricycle13ToInterval13";
106                    argsTri2Int[1] = "test/input/interval13/data/splitInputOneGeneTrain/hidden.tricycle13.dat";     
107                    argsTri2Int[2] = "test/working/shortTrain.interval13.txt";
108    
109                    argsInt2Tri[0] = "interval13ToTricycle13";
110                    argsInt2Tri[1] = "test/input/interval13/data/splitInputOneGeneTrain/hidden.dat";        
111                    argsInt2Tri[2] = "test/working/shortTrain.tricycle13.txt";
112                    
113                    HiddenSequenceConverter.main(argsTri2Int);
114                    HiddenSequenceConverter.main(argsInt2Tri);
115                    
116                    assertFilesMatch(argsTri2Int[1],argsInt2Tri[2]);
117                    assertFilesMatch(argsTri2Int[2],argsInt2Tri[1]);
118            }
119            
120            
121            
122    }