001    package calhoun.analysis.crf.features.interval13;
002    
003    import java.util.List;
004    
005    import calhoun.analysis.crf.AbstractFeatureManager;
006    import calhoun.analysis.crf.FeatureList;
007    import calhoun.analysis.crf.ModelManager;
008    import calhoun.analysis.crf.features.supporting.LogProbLookup;
009    import calhoun.analysis.crf.io.InputSequence;
010    import calhoun.analysis.crf.io.TrainingSequence;
011    import calhoun.util.Assert;
012    
013    
014    public abstract class ReferenceBasePredictorInterval13Base extends AbstractFeatureManager<Character> {
015    
016            private static final long serialVersionUID = 8194502006226691957L;
017            ModelManager model;
018            int startIx;
019                    
020            boolean multipleFeatures = false;
021            
022            double pseudoCounts;
023            int lookback;
024            
025            LogProbLookup   intron;
026            LogProbLookup   intergenic;
027            LogProbLookup[] exonic;
028            
029            public ReferenceBasePredictorInterval13Base() {
030            }
031    
032            public String getFeatureName(int featureIndex) {
033                    if(multipleFeatures) {
034                            String[] vals = new String[] { "Intergenic", "Exon pos.", "Intron pos.", "Exon neg.", "Intron neg."};
035                            int feat = featureIndex - startIx;
036                            String table = vals[feat];
037                            return table+" base composition";
038                    }
039                    else {
040                            return "referenceBasePredictorInterval13";
041                    }
042            }
043    
044            public int getNumFeatures() {
045                    return multipleFeatures ? 5 : 1;
046            }
047            
048            public void train(int startingIndex, ModelManager modelInfo, List<? extends TrainingSequence<? extends Character>> data) {
049                    startIx = startingIndex;
050                    
051                    model = modelInfo;
052                    Interval13Tools.verify(modelInfo);
053    
054                    pseudoCounts = 1.0;     
055                    lookback = 3;
056    
057                    
058                    // Construct the space for the lookup tables.
059                    exonic = new LogProbLookup[3];
060                    for (int j=0; j<3; j++) {
061                            exonic[j] = new LogProbLookup(lookback,pseudoCounts);
062                    }
063                    intron     = new LogProbLookup(lookback,pseudoCounts);
064                    intergenic = new LogProbLookup(lookback,pseudoCounts);
065    
066    
067                    for(TrainingSequence<? extends Character> seq : data) {
068                            for (int pos=0; pos<seq.length(); pos++) {
069                                    
070                                    int state = seq.getY(pos);
071                                    switch(state) {
072                                    case(0):
073                                            intergenic.increment(seq,pos,true);
074                                            intergenic.increment(seq,pos,false);
075                                            break;
076                                    case(1):
077                                    case(2):
078                                    case(3):
079                                            exonic[((pos-state+1)%3+3)%3].increment(seq,pos,true);
080                                            break;
081                                    case(4):
082                                    case(5):
083                                    case(6):
084                                            intron.increment(seq,pos,true);
085                                            break;
086                                    case(7):
087                                    case(8):
088                                    case(9):
089                                            exonic[((-pos+state+1)%3+3)%3].increment(seq,pos,false);
090                                            break;
091                                    case(10):
092                                    case(11):
093                                    case(12):
094                                            intron.increment(seq,pos,false);
095                                            break;
096                                    default:
097                                            Assert.a(false);
098                                    }       
099                            }
100                    }
101                    
102                    for (int j=0; j<3; j++) {
103                            exonic[j].finalize();
104                    }
105                    intron.finalize();
106                    intergenic.finalize();
107            }
108    
109            
110            public void evaluateNode(InputSequence<? extends Character> seq, int pos, int state, FeatureList result) {
111                    double evaluation=0;
112    
113                    int indexOffset = Integer.MIN_VALUE;
114                    int phase;
115                    switch(state) {
116                    case(0):
117                            evaluation = intergenic.lookup(seq,pos,true);
118                            indexOffset = 0;
119                            break;
120                    case(1):
121                    case(2):
122                    case(3):
123                            phase = ((pos-state+1)%3+3)%3;
124                            evaluation = exonic[phase].lookup(seq,pos,true);
125                            indexOffset = 1;// + phase;
126                            break;
127                    case(4):
128                    case(5):
129                    case(6):
130                            evaluation = intron.lookup(seq,pos,true);
131                            indexOffset = 2;
132                            break;
133                    case(7):
134                    case(8):
135                    case(9):
136                            phase = ((-pos+state+1)%3+3)%3;
137                            evaluation = exonic[phase].lookup(seq,pos,false);
138                            indexOffset = 3;// + phase;
139                            break;
140                    case(10):
141                    case(11):
142                    case(12):
143                            evaluation = intron.lookup(seq,pos,false);
144                            indexOffset = 4;
145                            break;
146                    default:
147                            Assert.a(false);
148                    }
149                    
150                    result.addFeature(startIx + (multipleFeatures ? indexOffset : 0), evaluation);          
151            }
152    
153            /** if true, a separate feature index is used for each state, creating 13 weights instead of 1.
154             * @return returns true if a separate feature index is used for each state
155             */
156            public boolean isMultipleFeatures() {
157                    return multipleFeatures;
158            }
159    
160            /**
161             * @param multipleFeatures The multipleFeatures to set.
162             */
163            public void setMultipleFeatures(boolean weightPerState) {
164                    this.multipleFeatures = weightPerState;
165            }
166    
167            
168    }