001 package calhoun.analysis.crf.features.interval13;
002
003 import java.util.List;
004
005 import org.apache.commons.logging.Log;
006 import org.apache.commons.logging.LogFactory;
007
008 import calhoun.analysis.crf.AbstractFeatureManager;
009 import calhoun.analysis.crf.CacheStrategySpec;
010 import calhoun.analysis.crf.FeatureList;
011 import calhoun.analysis.crf.FeatureManagerEdge;
012 import calhoun.analysis.crf.ModelManager;
013 import calhoun.analysis.crf.CacheStrategySpec.CacheStrategy;
014 import calhoun.analysis.crf.features.supporting.PWMLookup;
015 import calhoun.analysis.crf.io.InputSequence;
016 import calhoun.analysis.crf.io.TrainingSequence;
017 import calhoun.util.Assert;
018
019 public class PWMInterval13 extends AbstractFeatureManager<Character> implements FeatureManagerEdge<Character> {
020 private static final long serialVersionUID = -7659288739348604129L;
021 private static final Log log = LogFactory.getLog(PWMInterval13.class);
022 boolean debug = log.isDebugEnabled();
023
024 boolean multipleFeatures = false;
025
026 int startIx; // The index of the first feature managed by this FeatureManager
027 ModelManager model;
028
029
030 PWMLookup start,stop;
031 PWMLookup[] donor,acceptor;
032 double pseudoCounts;
033
034
035 public PWMInterval13() {
036 }
037
038 public int getNumFeatures() {
039 return multipleFeatures ? 8 : 1;
040 }
041
042 public String getFeatureName(int featureIndex) {
043 if(multipleFeatures) {
044 String[] vals = new String[] { "Start", "Stop", "Donor 0", "Donor 1", "Donor 2", "Acceptor 0", "Acceptor 1", "Acceptor 2"};
045 String feature = vals[featureIndex - startIx];
046 return feature + " PWM";
047 }
048 else {
049 Assert.a(featureIndex == startIx);
050 return "PwmFeatureInterval13";
051 }
052 }
053
054 public void train(int startingIndex, ModelManager modelInfo, List<? extends TrainingSequence<? extends Character>> data) {
055 startIx = startingIndex;
056 model = modelInfo;
057
058 Interval13Tools.verify(modelInfo);
059
060
061 // Construct the space for the lookup tables.
062 pseudoCounts = 1.0;
063 donor = new PWMLookup[3];
064 acceptor = new PWMLookup[3];
065 for (int j=0; j<3; j++) {
066 donor[j] = new PWMLookup(Interval13Model.getPadExon3prime(),Interval13Model.getPadIntron5prime(),pseudoCounts); // donor signal xxx|GTxxxx
067 acceptor[j] = new PWMLookup(Interval13Model.getPadIntron3prime(),Interval13Model.getPadExon5prime(),pseudoCounts); // acceptor signal xxxxxxxAG|xxxxxx
068 }
069 // Note: start PWM and stop PWM must extend equally far into the intergenic space, so that can set pads
070 // stop and donor must also extend same amount into exon
071 // start and acceptor must extend same amount into exon
072 start = new PWMLookup(Interval13Model.getPadIntergenic(),Interval13Model.getPadExon5prime(),pseudoCounts); // start signal xxxxxxxxx|ATGxxx
073 stop = new PWMLookup(Interval13Model.getPadExon3prime(),Interval13Model.getPadIntergenic(),pseudoCounts); // stop signal xxx|TAGxxxxxx
074
075
076 // Increment the lookup tables below
077 for(TrainingSequence<? extends Character> seq : data) {
078 for (int pos=1; pos<seq.length(); pos++) { // note start at one not zero, so can look back at prevState
079
080 int state = seq.getY(pos);
081 int prevState = seq.getY(pos-1);
082 int iind;
083 switch(Interval13Tools.edgeConstraints[prevState*Interval13Tools.numStates + state]) {
084 case NONE:
085 case PCODE:
086 case MCODE:
087 break;
088 case NEVER:
089 Assert.a(false,"pos = "+pos+" prevState = " + modelInfo.getStateName(prevState) + " State = " + modelInfo.getStateName(state)); // A nice side effect of making sure the input sequence is legal, can omit this if you want to.
090 break;
091 case PSTART:
092 start.increment(seq, pos,true);
093 break;
094 case PDON:
095 iind = Interval13Tools.check012(state-4);
096 donor[iind].increment(seq,pos,true);
097 break;
098 case PACC:
099 iind = Interval13Tools.check012(prevState-4);
100 acceptor[iind].increment(seq,pos,true);
101 break;
102 case PSTOP:
103 stop.increment(seq,pos,true);
104 break;
105 case MSTART:
106 start.increment(seq,pos,false);
107 break;
108 case MDON:
109 iind = Interval13Tools.check012(prevState-10);
110 donor[iind].increment(seq,pos,false);
111 break;
112 case MACC:
113 iind = Interval13Tools.check012(state-10);
114 acceptor[iind].increment(seq,pos,false);
115 break;
116 case MSTOP:
117 stop.increment(seq,pos,false);
118 break;
119 default:
120 Assert.a(false); // We should have a complete enumeration of possibilities above.
121 }
122 }
123 }
124
125 for (int j=0; j<3; j++) {
126 donor[j].completeCounts();
127 acceptor[j].completeCounts();
128 }
129 start.completeCounts();
130 stop.completeCounts();
131 }
132
133
134 public void evaluateEdge(InputSequence<? extends Character> seq, int pos, int previousState, int state, FeatureList result) {
135 double val = 0.0;
136
137 int featureIndex = Integer.MIN_VALUE;
138 int iind;
139 switch(Interval13Tools.edgeConstraints[previousState*Interval13Tools.numStates + state]) {
140 case NONE:
141 case PCODE:
142 case MCODE:
143 break;
144 case NEVER:
145 Assert.a(false); // A nice side effect of making sure the input sequence is legal, can omit this if you want to.
146 break;
147 case PSTART:
148 featureIndex = 0;
149 val = start.lookup(seq, pos,true);
150 break;
151 case PDON:
152 iind = Interval13Tools.check012(state-4);
153 featureIndex = 2+iind;;
154 val = donor[iind].lookup(seq,pos,true);
155 break;
156 case PACC:
157 iind = Interval13Tools.check012(previousState-4);
158 featureIndex = 5+iind;;
159 val = acceptor[iind].lookup(seq,pos,true);
160 break;
161 case PSTOP:
162 featureIndex = 1;
163 val = stop.lookup(seq,pos,true);
164 break;
165 case MSTART:
166 featureIndex = 0;
167 val = start.lookup(seq,pos,false);
168 break;
169 case MDON:
170 iind = Interval13Tools.check012(previousState-10);
171 featureIndex = 2+iind;;
172 val = donor[iind].lookup(seq,pos,false);
173 break;
174 case MACC:
175 iind = Interval13Tools.check012(state-10);
176 featureIndex = 5+iind;;
177 val = acceptor[iind].lookup(seq,pos,false);
178 break;
179 case MSTOP:
180 featureIndex = 1;
181 val = stop.lookup(seq,pos,false);
182 break;
183 default:
184 Assert.a(false); // We should have a complete enumeration of possibilities above.
185
186 }
187 Assert.a(val<=0);
188 result.addFeature(startIx + (multipleFeatures ? featureIndex : 0),val);
189 }
190
191 @Override
192 public CacheStrategySpec getCacheStrategy() {
193 return new CacheStrategySpec(CacheStrategy.UNSPECIFIED);
194 }
195
196 /**
197 * @return Returns the multipleFeatures.
198 */
199 public boolean isMultipleFeatures() {
200 return multipleFeatures;
201 }
202
203 /**
204 * @param multipleFeatures The multipleFeatures to set.
205 */
206 public void setMultipleFeatures(boolean multipleFeatures) {
207 this.multipleFeatures = multipleFeatures;
208 }
209 }
210