001 package calhoun.analysis.crf.test;
002
003 import java.util.ArrayList;
004
005 import org.apache.commons.logging.Log;
006 import org.apache.commons.logging.LogFactory;
007
008 import calhoun.analysis.crf.features.supporting.MaxentMotifModel;
009 import calhoun.analysis.crf.statistics.BasicStats;
010 import calhoun.util.AbstractTestCase;
011 import calhoun.util.FileUtil;
012
013 public class MaxentMotifModelTest extends AbstractTestCase {
014 private static final Log log = LogFactory.getLog(CRFIOTest.class);
015 boolean debug = log.isDebugEnabled();
016
017 public void testMaxentModel() throws Exception {
018 ArrayList<int[]> motifExamples = new ArrayList<int[]>();
019 motifExamples.add(new int[]{0,0,0});
020 //motifExamples.add(new int[]{1,1,0});
021 //motifExamples.add(new int[]{1,0,1});
022 //motifExamples.add(new int[]{0,1,1});
023 motifExamples.add(new int[]{1,1,1});
024
025 double[] pp = MaxentMotifModel.trainMaxentDistributionUsingAllPairwiseConstraints(motifExamples,3,30,1.0);
026
027 assertEquals(pp.length,64);
028
029 for (int a=0; a<4; a++) {
030 for (int b=0; b<4; b++) {
031 for (int c=0; c<4; c++) {
032 int h = 16*a+4*b+c;
033 if (pp[h] > 0.001) {
034 System.out.println("" + a + " " + b + " " + c + " " + pp[h]);
035 }
036 }
037 }
038 }
039
040 ArrayList<int[]> motifExamples2 = new ArrayList<int[]>();
041
042 double[] qq = MaxentMotifModel.trainMaxentDistributionUsingAllPairwiseConstraints(motifExamples2,3,30,1.0);
043
044 assertEquals(qq.length,64);
045
046 for (int a=0; a<4; a++) {
047 for (int b=0; b<4; b++) {
048 for (int c=0; c<4; c++) {
049 int h = 16*a+4*b+c;
050 if (qq[h] > 0.001) {
051 System.out.println("" + a + " " + b + " " + c + " " + qq[h]);
052 }
053 }
054 }
055 }
056 }
057
058 public void testMaxentDonorModel() throws Exception {
059 // We begin with lots of examples of donor sites, build the maximum entropy distribution
060 // based on all pairwise marginals using Java, and compare results with the same
061 // maxent distribution (of length 16384 = 4^7) using Matlab.
062
063 String fileName ="test/input/donor_examples.txt";
064 String[][] preDonorExamples = FileUtil.readFlatFile(fileName);
065 int nExamples = preDonorExamples.length;
066 assertEquals(nExamples,1098);
067 int span = 7;
068 System.out.println("Number of donor examples is " + nExamples);
069
070 ArrayList<int[]> donorExamples = new ArrayList<int[]>();
071 for (int j=0; j<nExamples; j++) {
072 assertEquals(preDonorExamples[j].length,span);
073 int[] example = new int[span];
074 for (int k=0; k<span; k++) {
075 example[k] = (int) Math.round(Double.parseDouble(preDonorExamples[j][k])) - 1;
076 }
077 donorExamples.add(example);
078 }
079
080
081
082 String fileName2 = "test/input/donor_maxent.txt";
083 double[] x = FileUtil.readDoublesFromSingleTabbedLine(fileName2);
084 System.out.println("Length is " + x.length +" Sum is " + BasicStats.sumDoubleArray(x));
085
086 double[] y = MaxentMotifModel.trainMaxentDistributionUsingAllPairwiseConstraints(donorExamples,span,200,0.0);
087
088 double diff = BasicStats.L1Distance(x,y);
089 System.out.println("The L1 difference between the Matlab and Java maxent distributions was " + diff);
090 assert(diff < 0.01);
091
092 System.out.println("The max and argmax of the Java distribution are " + BasicStats.max(y) + " " + BasicStats.argmax(y));
093 System.out.println("The max and argmax of the Matlab distribution are " + BasicStats.max(x) + " " + BasicStats.argmax(x));
094
095 }
096
097
098
099 }