001    package calhoun.analysis.crf.features.interval13;
002    
003    import java.util.List;
004    
005    import org.apache.commons.logging.Log;
006    import org.apache.commons.logging.LogFactory;
007    
008    import calhoun.analysis.crf.AbstractFeatureManager;
009    import calhoun.analysis.crf.CacheStrategySpec;
010    import calhoun.analysis.crf.FeatureList;
011    import calhoun.analysis.crf.FeatureManagerEdge;
012    import calhoun.analysis.crf.ModelManager;
013    import calhoun.analysis.crf.CacheStrategySpec.CacheStrategy;
014    import calhoun.analysis.crf.features.supporting.PWMLookup;
015    import calhoun.analysis.crf.io.InputSequence;
016    import calhoun.analysis.crf.io.TrainingSequence;
017    import calhoun.util.Assert;
018    
019    public class PWMInterval13 extends AbstractFeatureManager<Character> implements FeatureManagerEdge<Character> {
020            private static final long serialVersionUID = -7659288739348604129L;
021            private static final Log log = LogFactory.getLog(PWMInterval13.class);
022            boolean debug = log.isDebugEnabled();
023            
024            boolean multipleFeatures = false;
025            
026            int startIx;  // The index of the first feature managed by this FeatureManager
027            ModelManager model;
028            
029            
030            PWMLookup start,stop;
031            PWMLookup[] donor,acceptor;
032            double pseudoCounts;    
033    
034            
035            public PWMInterval13() {
036            }
037            
038            public int getNumFeatures() {
039                    return multipleFeatures ? 8 : 1;
040            }       
041            
042            public String getFeatureName(int featureIndex) {
043                    if(multipleFeatures) {
044                            String[] vals = new String[] { "Start", "Stop", "Donor 0", "Donor 1", "Donor 2", "Acceptor 0", "Acceptor 1", "Acceptor 2"};
045                            String feature = vals[featureIndex - startIx];
046                            return feature + " PWM";
047                    }
048                    else {
049                            Assert.a(featureIndex == startIx);
050                            return "PwmFeatureInterval13";
051                    }
052            }
053    
054            public void train(int startingIndex, ModelManager modelInfo, List<? extends TrainingSequence<? extends Character>> data) {
055                    startIx = startingIndex;
056                    model = modelInfo;              
057                    
058                    Interval13Tools.verify(modelInfo);
059                                    
060                    
061                    // Construct the space for the lookup tables.
062                    pseudoCounts = 1.0;     
063                    donor    = new PWMLookup[3];
064                    acceptor = new PWMLookup[3];
065                    for (int j=0; j<3; j++) {
066                            donor[j]    = new PWMLookup(Interval13Model.getPadExon3prime(),Interval13Model.getPadIntron5prime(),pseudoCounts);   // donor signal xxx|GTxxxx 
067                            acceptor[j] = new PWMLookup(Interval13Model.getPadIntron3prime(),Interval13Model.getPadExon5prime(),pseudoCounts);   // acceptor signal  xxxxxxxAG|xxxxxx
068                    }
069                    // Note: start PWM and stop PWM must extend equally far into the intergenic space, so that can set pads
070                    // stop and donor must also extend same amount into exon
071                    // start and acceptor must extend same amount into exon
072                    start = new PWMLookup(Interval13Model.getPadIntergenic(),Interval13Model.getPadExon5prime(),pseudoCounts);             // start signal xxxxxxxxx|ATGxxx
073                    stop  = new PWMLookup(Interval13Model.getPadExon3prime(),Interval13Model.getPadIntergenic(),pseudoCounts);             // stop signal xxx|TAGxxxxxx
074    
075                                    
076                    // Increment the lookup tables below
077                    for(TrainingSequence<? extends Character> seq : data) {
078                            for (int pos=1; pos<seq.length(); pos++) { // note start at one not zero, so can look back at prevState
079                                    
080                                    int state = seq.getY(pos);
081                                    int prevState = seq.getY(pos-1);
082                                    int iind;
083                                    switch(Interval13Tools.edgeConstraints[prevState*Interval13Tools.numStates + state]) {
084                                    case NONE:
085                                    case PCODE:
086                                    case MCODE:
087                                            break;
088                                    case NEVER:
089                                            Assert.a(false,"pos = "+pos+" prevState = " + modelInfo.getStateName(prevState) + "   State = " + modelInfo.getStateName(state));  // A nice side effect of making sure the input sequence is legal, can omit this if you want to.
090                                            break;
091                                    case PSTART:
092                                            start.increment(seq, pos,true);
093                                            break;
094                                    case PDON:
095                                            iind = Interval13Tools.check012(state-4);
096                                            donor[iind].increment(seq,pos,true);
097                                            break;
098                                    case PACC:
099                                            iind = Interval13Tools.check012(prevState-4);
100                                            acceptor[iind].increment(seq,pos,true);
101                                            break;
102                                    case PSTOP:
103                                            stop.increment(seq,pos,true);
104                                            break;
105                                    case MSTART:
106                                            start.increment(seq,pos,false);
107                                            break;
108                                    case MDON:
109                                            iind = Interval13Tools.check012(prevState-10);
110                                            donor[iind].increment(seq,pos,false);
111                                            break;
112                                    case MACC:
113                                            iind = Interval13Tools.check012(state-10);
114                                            acceptor[iind].increment(seq,pos,false);
115                                            break;
116                                    case MSTOP:
117                                            stop.increment(seq,pos,false);
118                                            break;
119                                    default:
120                                            Assert.a(false);  // We should have a complete enumeration of possibilities above.
121                                    }
122                            }
123                    }
124                    
125                    for (int j=0; j<3; j++) {
126                            donor[j].completeCounts();
127                            acceptor[j].completeCounts();
128                    }
129                    start.completeCounts();
130                    stop.completeCounts();
131            }
132            
133            
134            public void evaluateEdge(InputSequence<? extends Character> seq, int pos, int previousState, int state, FeatureList result) {
135                    double val = 0.0;       
136                    
137                    int featureIndex = Integer.MIN_VALUE;
138                    int iind;
139                    switch(Interval13Tools.edgeConstraints[previousState*Interval13Tools.numStates + state]) {
140                    case NONE:
141                    case PCODE:
142                    case MCODE:
143                            break;
144                    case NEVER:
145                            Assert.a(false);  // A nice side effect of making sure the input sequence is legal, can omit this if you want to.
146                            break;
147                    case PSTART:
148                            featureIndex = 0;
149                            val = start.lookup(seq, pos,true);
150                            break;
151                    case PDON:
152                            iind = Interval13Tools.check012(state-4);
153                            featureIndex = 2+iind;;
154                            val = donor[iind].lookup(seq,pos,true);
155                            break;
156                    case PACC:
157                            iind = Interval13Tools.check012(previousState-4);
158                            featureIndex = 5+iind;;
159                            val = acceptor[iind].lookup(seq,pos,true);
160                            break;
161                    case PSTOP:
162                            featureIndex = 1;
163                            val = stop.lookup(seq,pos,true);
164                            break;
165                    case MSTART:
166                            featureIndex = 0;
167                            val = start.lookup(seq,pos,false);
168                            break;
169                    case MDON:
170                            iind = Interval13Tools.check012(previousState-10);
171                            featureIndex = 2+iind;;
172                            val = donor[iind].lookup(seq,pos,false);
173                            break;
174                    case MACC:
175                            iind = Interval13Tools.check012(state-10);
176                            featureIndex = 5+iind;;
177                            val = acceptor[iind].lookup(seq,pos,false);
178                            break;
179                    case MSTOP:
180                            featureIndex = 1;
181                            val = stop.lookup(seq,pos,false);
182                            break;
183                    default:
184                            Assert.a(false);  // We should have a complete enumeration of possibilities above.
185                    
186                    }
187                    Assert.a(val<=0);
188                    result.addFeature(startIx + (multipleFeatures ? featureIndex : 0),val);
189            }
190    
191            @Override
192            public CacheStrategySpec getCacheStrategy() {
193                    return new CacheStrategySpec(CacheStrategy.UNSPECIFIED);
194            }
195    
196            /**
197             * @return Returns the multipleFeatures.
198             */
199            public boolean isMultipleFeatures() {
200                    return multipleFeatures;
201            }
202    
203            /**
204             * @param multipleFeatures The multipleFeatures to set.
205             */
206            public void setMultipleFeatures(boolean multipleFeatures) {
207                    this.multipleFeatures = multipleFeatures;
208            }
209    }
210