001 package calhoun.analysis.crf.test;
002
003 import java.util.ArrayList;
004 import java.util.List;
005
006 import org.apache.commons.logging.Log;
007 import org.apache.commons.logging.LogFactory;
008
009 import calhoun.analysis.crf.executables.HiddenSequenceConverter;
010 import calhoun.analysis.crf.io.InputHandler;
011 import calhoun.analysis.crf.io.InputHandlerInterleaved;
012 import calhoun.analysis.crf.io.SequenceConverter;
013 import calhoun.analysis.crf.io.StringInput;
014 import calhoun.analysis.crf.io.TrainingSequence;
015 import calhoun.util.AbstractTestCase;
016 import calhoun.util.Assert;
017
018 public class SequenceConverterTest extends AbstractTestCase {
019
020 private static final Log log = LogFactory.getLog(SequenceConverterTest.class);
021 boolean debug = log.isDebugEnabled();
022
023
024 public void testTricycle13Interval13Conversion() throws Exception {
025 System.out.println("Testing Hidden Sequence Converter between Interval13 and Tricycle13 and back.");
026
027 // This has nothing to do with GTF but this config file does what we want
028 //ConfigurableModelManager cm= new ConfigurableModelManager("test/input/configGTF.txt");
029
030 InputHandler ih = new InputHandlerInterleaved(new StringInput());
031 List<? extends TrainingSequence<?>> data = ih.readTrainingData("test/input/shortTrain.tricycle13.txt");
032 List<TrainingSequence<Character>> listOfSeqs = new ArrayList<TrainingSequence<Character>>();
033
034 convertSequence(data, listOfSeqs);
035
036 ih.writeTrainingData("test/working/shortTrain.interval13.txt", listOfSeqs);
037
038 // Test below is a little weak right now (20060705) as I used this test to create the file in input; but will get
039 // stronger as I plan to use the file "test/input/shortTrain.interval13.txt" elsewhere in testing
040 assertFilesMatch("test/input/interval13/data/shortTrain.interval13.txt","test/working/shortTrain.interval13.txt");
041
042 //
043 // SAME TEST, DIFFERENT FILE
044 //
045 ih = new InputHandlerInterleaved(new StringInput());
046 List<? extends TrainingSequence<?>> data2 = ih.readTrainingData("test/input/negStrandGene.tricycle13.txt");
047 List<TrainingSequence<Character>> listOfSeqs2 = new ArrayList<TrainingSequence<Character>>();
048
049 convertSequence(data2, listOfSeqs2);
050
051 ih.writeTrainingData("test/working/negStrandGene.interval13.txt", listOfSeqs2);
052
053 assertFilesMatch("test/input/interval13/data/negStrandGene.interval13.txt","test/working/negStrandGene.interval13.txt");
054 }
055
056 private void convertSequence(List<? extends TrainingSequence<?>> data, List<TrainingSequence<Character>> listOfSeqs) {
057 for (int i=0; i<data.size(); i++) {
058 TrainingSequence<Character> seq = (TrainingSequence<Character>) data.get(i);
059 int len = seq.length();
060
061 int[] oldy = new int[len];
062 for (int j=0; j<len; j++) { oldy[j] = seq.getY(j); }
063
064 SequenceConverter.convertSeqFromTricycle13ToInterval13(seq);
065
066 listOfSeqs.add(seq);
067
068 SequenceConverter.convertSeqFromInterval13ToTricycle13(seq);
069
070 int[] newy = new int[len];
071 for (int j=0; j<len; j++) {
072 newy[j] = seq.getY(j);
073 Assert.a(newy[j] == oldy[j]);
074 }
075
076 SequenceConverter.convertSeqFromTricycle13ToInterval13(seq);
077 }
078 }
079
080
081 public void testSequenceConverter() throws Exception {
082 System.out.println("Testing Hidden Sequence Converter.");
083 String[] args13 = new String[4];
084 String[] args39 = new String[4];
085
086 args13[0] = "13to39";
087 args13[1] = "test/input/ghmmUsingCrfIO/basic_ModelDSComp.xml"; // config file
088 args13[2] = "test/input/ghmmUsingCrfIO/cnDT_chr14_60k.txt";
089 args13[3] = "test/working/cnDT_chr14_60k_39states.txt";
090
091 args39[0] = "39to13";
092 args39[1] = "test/input/ghmmUsingCrfIO/basic_ModelDSComp.xml"; // config file
093 args39[2] = args13[3];
094 args39[3] = "test/working/cnDT_chr14_60k_13states.txt";
095
096 HiddenSequenceConverter.main(args13);
097 HiddenSequenceConverter.main(args39);
098 }
099
100 public void testSequenceConverter13models() throws Exception {
101 System.out.println("Testing Hidden Sequence Converter.");
102 String[] argsTri2Int = new String[3];
103 String[] argsInt2Tri = new String[3];
104
105 argsTri2Int[0] = "tricycle13ToInterval13";
106 argsTri2Int[1] = "test/input/interval13/data/splitInputOneGeneTrain/hidden.tricycle13.dat";
107 argsTri2Int[2] = "test/working/shortTrain.interval13.txt";
108
109 argsInt2Tri[0] = "interval13ToTricycle13";
110 argsInt2Tri[1] = "test/input/interval13/data/splitInputOneGeneTrain/hidden.dat";
111 argsInt2Tri[2] = "test/working/shortTrain.tricycle13.txt";
112
113 HiddenSequenceConverter.main(argsTri2Int);
114 HiddenSequenceConverter.main(argsInt2Tri);
115
116 assertFilesMatch(argsTri2Int[1],argsInt2Tri[2]);
117 assertFilesMatch(argsTri2Int[2],argsInt2Tri[1]);
118 }
119
120
121
122 }