001 package calhoun.analysis.crf.features.interval13;
002
003 import java.util.List;
004
005 import calhoun.analysis.crf.AbstractFeatureManager;
006 import calhoun.analysis.crf.FeatureList;
007 import calhoun.analysis.crf.ModelManager;
008 import calhoun.analysis.crf.features.supporting.LogProbLookup;
009 import calhoun.analysis.crf.io.InputSequence;
010 import calhoun.analysis.crf.io.TrainingSequence;
011 import calhoun.util.Assert;
012
013
014 public abstract class ReferenceBasePredictorInterval13Base extends AbstractFeatureManager<Character> {
015
016 private static final long serialVersionUID = 8194502006226691957L;
017 ModelManager model;
018 int startIx;
019
020 boolean multipleFeatures = false;
021
022 double pseudoCounts;
023 int lookback;
024
025 LogProbLookup intron;
026 LogProbLookup intergenic;
027 LogProbLookup[] exonic;
028
029 public ReferenceBasePredictorInterval13Base() {
030 }
031
032 public String getFeatureName(int featureIndex) {
033 if(multipleFeatures) {
034 String[] vals = new String[] { "Intergenic", "Exon pos.", "Intron pos.", "Exon neg.", "Intron neg."};
035 int feat = featureIndex - startIx;
036 String table = vals[feat];
037 return table+" base composition";
038 }
039 else {
040 return "referenceBasePredictorInterval13";
041 }
042 }
043
044 public int getNumFeatures() {
045 return multipleFeatures ? 5 : 1;
046 }
047
048 public void train(int startingIndex, ModelManager modelInfo, List<? extends TrainingSequence<? extends Character>> data) {
049 startIx = startingIndex;
050
051 model = modelInfo;
052 Interval13Tools.verify(modelInfo);
053
054 pseudoCounts = 1.0;
055 lookback = 3;
056
057
058 // Construct the space for the lookup tables.
059 exonic = new LogProbLookup[3];
060 for (int j=0; j<3; j++) {
061 exonic[j] = new LogProbLookup(lookback,pseudoCounts);
062 }
063 intron = new LogProbLookup(lookback,pseudoCounts);
064 intergenic = new LogProbLookup(lookback,pseudoCounts);
065
066
067 for(TrainingSequence<? extends Character> seq : data) {
068 for (int pos=0; pos<seq.length(); pos++) {
069
070 int state = seq.getY(pos);
071 switch(state) {
072 case(0):
073 intergenic.increment(seq,pos,true);
074 intergenic.increment(seq,pos,false);
075 break;
076 case(1):
077 case(2):
078 case(3):
079 exonic[((pos-state+1)%3+3)%3].increment(seq,pos,true);
080 break;
081 case(4):
082 case(5):
083 case(6):
084 intron.increment(seq,pos,true);
085 break;
086 case(7):
087 case(8):
088 case(9):
089 exonic[((-pos+state+1)%3+3)%3].increment(seq,pos,false);
090 break;
091 case(10):
092 case(11):
093 case(12):
094 intron.increment(seq,pos,false);
095 break;
096 default:
097 Assert.a(false);
098 }
099 }
100 }
101
102 for (int j=0; j<3; j++) {
103 exonic[j].finalize();
104 }
105 intron.finalize();
106 intergenic.finalize();
107 }
108
109
110 public void evaluateNode(InputSequence<? extends Character> seq, int pos, int state, FeatureList result) {
111 double evaluation=0;
112
113 int indexOffset = Integer.MIN_VALUE;
114 int phase;
115 switch(state) {
116 case(0):
117 evaluation = intergenic.lookup(seq,pos,true);
118 indexOffset = 0;
119 break;
120 case(1):
121 case(2):
122 case(3):
123 phase = ((pos-state+1)%3+3)%3;
124 evaluation = exonic[phase].lookup(seq,pos,true);
125 indexOffset = 1;// + phase;
126 break;
127 case(4):
128 case(5):
129 case(6):
130 evaluation = intron.lookup(seq,pos,true);
131 indexOffset = 2;
132 break;
133 case(7):
134 case(8):
135 case(9):
136 phase = ((-pos+state+1)%3+3)%3;
137 evaluation = exonic[phase].lookup(seq,pos,false);
138 indexOffset = 3;// + phase;
139 break;
140 case(10):
141 case(11):
142 case(12):
143 evaluation = intron.lookup(seq,pos,false);
144 indexOffset = 4;
145 break;
146 default:
147 Assert.a(false);
148 }
149
150 result.addFeature(startIx + (multipleFeatures ? indexOffset : 0), evaluation);
151 }
152
153 /** if true, a separate feature index is used for each state, creating 13 weights instead of 1.
154 * @return returns true if a separate feature index is used for each state
155 */
156 public boolean isMultipleFeatures() {
157 return multipleFeatures;
158 }
159
160 /**
161 * @param multipleFeatures The multipleFeatures to set.
162 */
163 public void setMultipleFeatures(boolean weightPerState) {
164 this.multipleFeatures = weightPerState;
165 }
166
167
168 }