001    package calhoun.analysis.crf.features.interval29;
002    
003    import java.util.List;
004    
005    import org.apache.commons.logging.Log;
006    import org.apache.commons.logging.LogFactory;
007    
008    import calhoun.analysis.crf.AbstractFeatureManager;
009    import calhoun.analysis.crf.CacheStrategySpec;
010    import calhoun.analysis.crf.FeatureList;
011    import calhoun.analysis.crf.FeatureManagerEdge;
012    import calhoun.analysis.crf.ModelManager;
013    import calhoun.analysis.crf.CacheStrategySpec.CacheStrategy;
014    import calhoun.analysis.crf.features.supporting.PWMLookup;
015    import calhoun.analysis.crf.io.InputSequence;
016    import calhoun.analysis.crf.io.TrainingSequence;
017    import calhoun.util.Assert;
018    
019    public class PWMInterval29 extends AbstractFeatureManager<Character> implements FeatureManagerEdge<Character> {
020            private static final long serialVersionUID = -7659288739348604129L;
021            private static final Log log = LogFactory.getLog(PWMInterval29.class);
022            boolean debug = log.isDebugEnabled();
023            
024            boolean multipleFeatures = false;
025            
026            int startIx;  // The index of the first feature managed by this FeatureManager
027            ModelManager model;
028            
029            
030            PWMLookup start,stop;
031            PWMLookup[] donor,acceptor;
032            double pseudoCounts;    
033    
034            
035            public PWMInterval29() {
036            }
037            
038            public int getNumFeatures() {
039                    return multipleFeatures ? 8 : 1;
040            }       
041            
042            public String getFeatureName(int featureIndex) {
043                    if(multipleFeatures) {
044                            String feature = "";
045                            switch(featureIndex - startIx) {
046                            case 0:
047                                    feature = "start";
048                                    break;
049                            case 1:
050                                    feature = "stop";
051                                    break;
052                            case 2:
053                            case 3:
054                            case 4:
055                                    feature = "donor"+(featureIndex - startIx - 2);
056                            case 5:
057                            case 6:
058                            case 7:
059                                    feature = "acceptor"+(featureIndex - startIx - 5);
060                            }
061                            return "PwmFeatureInterval29 - "+feature;
062                    }
063                    else {
064                            Assert.a(featureIndex == startIx);
065                            return "PwmFeatureInterval29";
066                    }
067            }
068    
069            public void train(int startingIndex, ModelManager modelInfo, List<? extends TrainingSequence<? extends Character>> data) {
070                    startIx = startingIndex;
071                    model = modelInfo;              
072                    
073                    Interval29Tools.verify(modelInfo);
074                                    
075                    
076                    // Construct the space for the lookup tables.
077                    pseudoCounts = 1.0;     
078                    donor    = new PWMLookup[3];
079                    acceptor = new PWMLookup[3];
080                    for (int j=0; j<3; j++) {
081                            donor[j]    = new PWMLookup(Interval29Model.getPadExon3prime(),Interval29Model.getPadIntron5prime(),pseudoCounts);   // donor signal xxx|GTxxxx 
082                            acceptor[j] = new PWMLookup(Interval29Model.getPadIntron3prime(),Interval29Model.getPadExon5prime(),pseudoCounts);   // acceptor signal  xxxxxxxAG|xxxxxx
083                    }
084                    // Note: start PWM and stop PWM must extend equally far into the intergenic space, so that can set pads
085                    // stop and donor must also extend same amount into exon
086                    // start and acceptor must extend same amount into exon
087                    start = new PWMLookup(Interval29Model.getPadIntergenic(),Interval29Model.getPadExon5prime(),pseudoCounts);             // start signal xxxxxxxxx|ATGxxx
088                    stop  = new PWMLookup(Interval29Model.getPadExon3prime(),Interval29Model.getPadIntergenic(),pseudoCounts);             // stop signal xxx|TAGxxxxxx
089    
090                                    
091                    // Increment the lookup tables below
092                    for(TrainingSequence<? extends Character> seq : data) {
093                            for (int pos=1; pos<seq.length(); pos++) { // note start at one not zero, so can look back at prevState
094                                    
095                                    int state = seq.getY(pos);
096                                    int prevState = seq.getY(pos-1);
097                                    int iind;
098                                    switch(Interval29Tools.edgeConstraints[prevState*Interval29Tools.numStates + state]) {
099                                    case NONE:
100                                    case PCODE:
101                                    case MCODE:
102                                            break;
103                                    case NEVER:
104                                            Assert.a(false,"pos = "+pos+" prevState = " + modelInfo.getStateName(prevState) + "   State = " + modelInfo.getStateName(state));  // A nice side effect of making sure the input sequence is legal, can omit this if you want to.
105                                            break;
106                                    case PSTART:
107                                            start.increment(seq, pos,true);
108                                            break;
109                                    case PDON:
110                                            // y(k) : k = i-j (mod 3)
111                                            iind = Interval29Tools.check012(state-15);
112                                            donor[iind].increment(seq,pos,true);
113                                            break;
114                                    case PACC:
115                                            iind = Interval29Tools.check012(prevState-4);
116                                            acceptor[iind].increment(seq,pos,true);
117                                            break;
118                                    case PSTOP:
119                                            stop.increment(seq,pos,true);
120                                            break;
121                                    case MSTART:
122                                            start.increment(seq,pos,false);
123                                            break;
124                                    case MDON:
125                                            iind = Interval29Tools.check012(prevState-10);
126                                            donor[iind].increment(seq,pos,false);
127                                            break;
128                                    case MACC:
129                                            // y(k) : k = i+j (mod 3) 
130                                            iind = Interval29Tools.check012(state-23);
131                                            acceptor[iind].increment(seq,pos,false);
132                                            break;
133                                    case MSTOP:
134                                            stop.increment(seq,pos,false);
135                                            break;
136                                    case PKEEPE:
137                                    case PKEEPI:
138                                    case MKEEPE:
139                                    case MKEEPI:
140                                    case PSTOPPED:
141                                    case MSTARTED:
142                                    case PWILLSTART:
143                                    case MWILLSTOP:                                 
144                                            break;
145                                    default:
146                                            Assert.a(false);  // We should have a complete enumeration of possibilities above.
147                                    }
148                            }
149                    }
150                    
151                    for (int j=0; j<3; j++) {
152                            donor[j].completeCounts();
153                            acceptor[j].completeCounts();
154                    }
155                    start.completeCounts();
156                    stop.completeCounts();
157            }
158            
159            
160            public void evaluateEdge(InputSequence<? extends Character> seq, int pos, int previousState, int state, FeatureList result) {
161                    double val = 0.0;       
162                    
163                    int featureIndex = Integer.MIN_VALUE;
164                    int iind;
165                    switch(Interval29Tools.edgeConstraints[previousState*Interval29Tools.numStates + state]) {
166                    case NONE:
167                    case PCODE:
168                    case MCODE:
169                            break;
170                    case NEVER:
171                            Assert.a(false);  // A nice side effect of making sure the input sequence is legal, can omit this if you want to.
172                            break;
173                    case PSTART:
174                            featureIndex = 0;
175                            val = start.lookup(seq, pos,true);
176                            break;
177                    case PDON:
178                            iind = Interval29Tools.check012(state-15);
179                            featureIndex = 2+iind;;
180                            val = donor[iind].lookup(seq,pos,true);
181                            break;
182                    case PACC:
183                            iind = Interval29Tools.check012(previousState-4);
184                            featureIndex = 5+iind;;
185                            val = acceptor[iind].lookup(seq,pos,true);
186                            break;
187                    case PSTOP:
188                            featureIndex = 1;
189                            val = stop.lookup(seq,pos,true);
190                            break;
191                    case MSTART:
192                            featureIndex = 0;
193                            val = start.lookup(seq,pos,false);
194                            break;
195                    case MDON:
196                            iind = Interval29Tools.check012(previousState-10);
197                            featureIndex = 2+iind;;
198                            val = donor[iind].lookup(seq,pos,false);
199                            break;
200                    case MACC:
201                            iind = Interval29Tools.check012(state-23);
202                            featureIndex = 5+iind;;
203                            val = acceptor[iind].lookup(seq,pos,false);
204                            break;
205                    case MSTOP:
206                            featureIndex = 1;
207                            val = stop.lookup(seq,pos,false);
208                            break;
209                    case PKEEPE:
210                    case PKEEPI:
211                    case MKEEPE:
212                    case MKEEPI:
213                    case PSTOPPED:
214                    case MSTARTED:
215                    case PWILLSTART:
216                    case MWILLSTOP:
217                            break;                  
218                    default:
219                            Assert.a(false);  // We should have a complete enumeration of possibilities above.
220                    
221                    }
222                    Assert.a(val<=0);
223                    result.addFeature(startIx + (multipleFeatures ? featureIndex : 0),val);
224            }
225    
226            public CacheStrategySpec getCacheStrategy() {
227                    return new CacheStrategySpec(CacheStrategy.UNSPECIFIED);
228            }
229    
230            /**
231             * @return Returns the multipleFeatures.
232             */
233            public boolean isMultipleFeatures() {
234                    return multipleFeatures;
235            }
236    
237            /**
238             * @param multipleFeatures The multipleFeatures to set.
239             */
240            public void setMultipleFeatures(boolean multipleFeatures) {
241                    this.multipleFeatures = multipleFeatures;
242            }
243    }
244