001    package calhoun.analysis.crf.features.supporting;
002    
003    import java.io.Serializable;
004    
005    import org.apache.commons.logging.Log;
006    import org.apache.commons.logging.LogFactory;
007    
008    import calhoun.analysis.crf.io.InputSequence;
009    import calhoun.seq.KmerHasher;
010    import calhoun.util.Assert;
011    
012    
013    public class LogProbLookup implements Serializable {
014            private static final long serialVersionUID = -9195647924401633963L;
015            private static final Log log = LogFactory.getLog(LogProbLookup.class);
016            final KmerHasher.CharacterHash hashForward = KmerHasher.ACGTother;
017            final KmerHasher.CharacterHash hashReverse = KmerHasher.ACGTotherRC;            
018            boolean finalized;
019            
020            final int mult = 4;
021            int maxLookBack;
022            
023            double[] lookupTable;
024            int lookupTableSize;
025            
026            public LogProbLookup(int lookBack, double pseudoCount) {
027                    Assert.a(lookBack >= 0);
028                    Assert.a(lookBack < 10);
029                    this.maxLookBack = lookBack;
030                    finalized = false;
031                    
032                    lookupTableSize = 1;
033                    for (int i=0; i<=lookBack; i++) {
034                            lookupTableSize *= mult;
035                    }
036                    lookupTable = new double[lookupTableSize];
037                    
038                    for (int i=0; i<lookupTableSize; i++) {
039                            lookupTable[i] = pseudoCount;
040                    }
041            }
042    
043            private boolean isHistory(InputSequence<? extends Character> seq, int pos) {
044    
045                    for (int j=pos-maxLookBack; j<=pos+maxLookBack; j++) {
046                            if (hashForward.hash(seq.getX(j))==4) { return false; }
047                            // Above is identical to checking hashReverse
048                            // If there are N's within history window in either dircetion, want to ignore this position
049                    }
050                    return true;
051            }
052            
053            private int getInd(InputSequence<? extends Character> seq, int pos, boolean isPlus) {
054                    int ind = 0;
055                    if (isPlus) {
056                            if (pos < maxLookBack) { return -1; }
057                            for (int j=pos-maxLookBack; j<=pos; j++) {
058                                    int h = hashForward.hash( (char) seq.getX(j));
059                                    //int h = hashForward.hash('A');
060                                    if (h<4) {
061                                            ind *= mult;
062                                            ind += h;
063                                    } else {
064                                            return -1;
065                                    }
066                            }
067                    } else {
068                            if (pos + maxLookBack >= seq.length()) { return -1; }
069                            for (int j=pos+maxLookBack; j>=pos; j--) {
070                                    int h = hashReverse.hash( (char) seq.getX(j));
071                                    if (h<4) {
072                                            ind *= mult;
073                                            ind += h;
074                                    } else {
075                                            return -1;
076                                    }
077                            }
078                    }
079                    return ind;
080            }
081            
082            public void increment(InputSequence<? extends Character> seq, int pos, boolean isPlus) {
083                    Assert.a(!finalized);
084                    int ind = getInd(seq,pos,isPlus);
085                    if (ind >=0) {
086                            lookupTable[ind] += 1.0;
087                    }
088            }
089    
090            @Override
091            public void finalize() {
092                    Assert.a(!finalized);
093                    log.debug("finalizing a LogProbLookup, lookupTablesize="+lookupTableSize + "    mult=" + mult);
094                    for (int i=0; i<lookupTableSize/mult; i++) {
095                            // System.out.println("finalizing i=" + i);
096                            double sum = 0;
097                            for (int j=mult*i; j<mult*(i+1); j++) {
098                                    sum += lookupTable[j];
099                            }
100                            for (int j=mult*i; j<mult*(i+1); j++) {
101                                    lookupTable[j] = Math.log(lookupTable[j]/sum);
102                            }
103                    }
104                    finalized = true;
105            }
106            
107            public double lookup(InputSequence<? extends Character> seq, int pos, boolean isPlus) {
108                    Assert.a(finalized);    
109                    int ind = getInd(seq,pos,isPlus);
110                    if (ind >= 0) {
111                            return lookupTable[ind];
112                    }
113                    return 0.0;
114            }
115    
116    }