001    package calhoun.analysis.crf.test;
002    
003    import java.util.ArrayList;
004    
005    import org.apache.commons.logging.Log;
006    import org.apache.commons.logging.LogFactory;
007    
008    import calhoun.analysis.crf.features.supporting.MaxentMotifModel;
009    import calhoun.analysis.crf.statistics.BasicStats;
010    import calhoun.util.AbstractTestCase;
011    import calhoun.util.FileUtil;
012    
013    public class MaxentMotifModelTest extends AbstractTestCase {
014            private static final Log log = LogFactory.getLog(CRFIOTest.class);
015            boolean debug = log.isDebugEnabled();
016            
017            public void testMaxentModel() throws Exception {
018                    ArrayList<int[]> motifExamples = new ArrayList<int[]>();
019                    motifExamples.add(new int[]{0,0,0});
020                    //motifExamples.add(new int[]{1,1,0});
021                    //motifExamples.add(new int[]{1,0,1});
022                    //motifExamples.add(new int[]{0,1,1});
023                    motifExamples.add(new int[]{1,1,1});
024                    
025                    double[] pp = MaxentMotifModel.trainMaxentDistributionUsingAllPairwiseConstraints(motifExamples,3,30,1.0);
026                    
027                    assertEquals(pp.length,64);
028                    
029                    for (int a=0; a<4; a++) {
030                            for (int b=0; b<4; b++) {
031                                    for (int c=0; c<4; c++) {
032                                            int h = 16*a+4*b+c;
033                                            if (pp[h] > 0.001) {
034                                                    System.out.println("" + a + " " + b + " " + c + "   " + pp[h]);
035                                            }
036                                    }
037                            }
038                    }
039                    
040                    ArrayList<int[]> motifExamples2 = new ArrayList<int[]>();
041                    
042                    double[] qq = MaxentMotifModel.trainMaxentDistributionUsingAllPairwiseConstraints(motifExamples2,3,30,1.0);
043                    
044                    assertEquals(qq.length,64);
045                    
046                    for (int a=0; a<4; a++) {
047                            for (int b=0; b<4; b++) {
048                                    for (int c=0; c<4; c++) {
049                                            int h = 16*a+4*b+c;
050                                            if (qq[h] > 0.001) {
051                                                    System.out.println("" + a + " " + b + " " + c + "   " + qq[h]);
052                                            }
053                                    }
054                            }       
055                    }
056            }
057            
058            public void testMaxentDonorModel() throws Exception {
059                    // We begin with lots of examples of donor sites, build the maximum entropy distribution
060                    // based on all pairwise marginals using Java, and compare results with the same
061                    // maxent distribution (of length 16384 = 4^7) using Matlab.
062    
063                    String fileName ="test/input/donor_examples.txt";
064                    String[][] preDonorExamples = FileUtil.readFlatFile(fileName);
065                    int nExamples = preDonorExamples.length;
066                    assertEquals(nExamples,1098);
067                    int span = 7;
068                    System.out.println("Number of donor examples is " + nExamples);
069    
070                    ArrayList<int[]> donorExamples = new ArrayList<int[]>();            
071                    for (int j=0; j<nExamples; j++) {
072                            assertEquals(preDonorExamples[j].length,span);
073                            int[] example = new int[span];
074                            for (int k=0; k<span; k++) {
075                                    example[k] = (int) Math.round(Double.parseDouble(preDonorExamples[j][k])) - 1;
076                            }
077                            donorExamples.add(example);
078                    }
079                    
080                    
081                    
082                    String fileName2 = "test/input/donor_maxent.txt";
083                    double[] x = FileUtil.readDoublesFromSingleTabbedLine(fileName2); 
084                    System.out.println("Length is " + x.length  +"     Sum is " + BasicStats.sumDoubleArray(x));
085                    
086                    double[] y = MaxentMotifModel.trainMaxentDistributionUsingAllPairwiseConstraints(donorExamples,span,200,0.0);
087                    
088                    double diff = BasicStats.L1Distance(x,y);
089                    System.out.println("The L1 difference between the Matlab and Java maxent distributions was " + diff);
090                    assert(diff < 0.01);
091                    
092                    System.out.println("The max and argmax of the Java distribution are " + BasicStats.max(y) + "    " + BasicStats.argmax(y));
093                    System.out.println("The max and argmax of the Matlab distribution are " + BasicStats.max(x) + "    " + BasicStats.argmax(x));
094                    
095            }
096            
097            
098    
099    }