001 package calhoun.analysis.crf.features.interval29;
002
003 import java.util.List;
004
005 import calhoun.analysis.crf.AbstractFeatureManager;
006 import calhoun.analysis.crf.CacheStrategySpec;
007 import calhoun.analysis.crf.FeatureList;
008 import calhoun.analysis.crf.FeatureManagerEdge;
009 import calhoun.analysis.crf.FeatureManagerNode;
010 import calhoun.analysis.crf.ModelManager;
011 import calhoun.analysis.crf.CacheStrategySpec.CacheStrategy;
012 import calhoun.analysis.crf.io.InputSequence;
013 import calhoun.analysis.crf.io.TrainingSequence;
014 import calhoun.util.Assert;
015
016 /** Implements basic constraints on gene calls.
017 * 1) Intergenic - start must occur at ATG
018 * 2) Splice sites must be canonical GT/AG or GC/AG
019 * 3) Exon-stop must be followed by a start codon
020 */
021 public class GeneConstraintsInterval29 extends AbstractFeatureManager<Character> implements FeatureManagerEdge<Character>, FeatureManagerNode<Character> {
022 private static final long serialVersionUID = 3041359216265032511L;
023
024 public String getFeatureName(int featureIndex) {
025 return "Gene constraints for the model Interval29";
026 }
027
028 /** This is a constraint class, so we don't return features */
029 public int getNumFeatures() {
030 return 0;
031 }
032
033 public void train(int startingIndex, ModelManager modelInfo, List<? extends TrainingSequence<? extends Character>> data) {
034 Interval29Tools.verify(modelInfo);
035 }
036
037 public void evaluateEdge(InputSequence<? extends Character> seq, int pos, int prevState, int state, FeatureList result) {
038 boolean valid = true;
039
040 int eind,iind;
041 int eind1, eind2, iind1, iind2;
042 switch(Interval29Tools.edgeConstraints[prevState*Interval29Tools.numStates + state]) {
043 case NONE:
044 break;
045 case NEVER:
046 Assert.a(false);
047 break;
048 case PSTART:
049 // ig-e to exonE
050 eind = Interval29Tools.check012(state-1);
051 if ((pos-eind)%3 != 0) { valid = false; break; }
052 valid = startConstraintPlus(seq, pos);
053 break;
054 case PDON:
055 // exonE to e-iI
056 // k = e-i
057 iind = Interval29Tools.check012(state-15);
058 eind = Interval29Tools.check012(prevState-1);
059 if ((pos-eind+iind)%3 != 0) { valid = false; break; }
060 valid = pos < 0 || donorConstraintPlus(seq, pos);
061 break;
062 case PACC:
063 // intronI to i-eE
064 // k = e-i
065 eind = Interval29Tools.check012(state-18);
066 iind = Interval29Tools.check012(prevState-4);
067 if ((pos+2-eind+iind)%3 != 0) { valid = false; break; }
068 valid = (pos+2 >= seq.length()) || acceptorConstraintPlus(seq, pos+2);
069 break;
070 case PSTOP:
071 // exonE to e-ig
072 eind = Interval29Tools.check012(prevState-1);
073 if ((pos-eind)%3 != 0) { valid = false; break; }
074 valid = stopEdgeConstraintPlus(seq, pos);
075 break; //done to here
076 case MSTART:
077 // exonEm to em-ig
078 eind = Interval29Tools.check012(prevState-7);
079 if ((pos-eind)%3 != 0) { valid = false; break; }
080 valid = startConstraintMinus(seq, pos);
081 break;
082 case MDON:
083 // intronIm to im-eEm
084 // k = e+i
085 eind = Interval29Tools.check012(state-26);
086 iind = Interval29Tools.check012(prevState-10);
087 if ((pos+2-eind-iind)%3 != 0) { valid = false; break; }
088 valid = (pos+2 >= seq.length()) || donorConstraintMinus(seq, pos+2);
089 break;
090 case MACC:
091 // exonEm to e-iIm
092 // k = e+i
093 iind = Interval29Tools.check012(state-23);
094 eind = Interval29Tools.check012(prevState-7);
095 if ((pos-eind-iind)%3 != 0) { valid = false; break; }
096 valid = pos < 0 || acceptorConstraintMinus(seq, pos);
097 break;
098 case MSTOP:
099 // ig-em to exonEm
100 eind = Interval29Tools.check012(state - 7);
101 if ((pos-eind)%3 != 0) { valid = false; break; }
102 valid = stopEdgeConstraintMinus(seq, pos);
103 break;
104 case PCODE: // redundant with node invalidation below
105 eind = Interval29Tools.check012(state-1);
106 if ( (pos-eind)%3 == 2) {
107 valid = !stopNodeConstraintPlus(seq, pos);
108 }
109 break;
110 case MCODE: // redundant with node evaluation below
111 eind = Interval29Tools.check012(state-7);
112 if ( (pos-eind)%3==0) {
113 valid = !stopNodeConstraintMinus(seq, pos);
114 }
115 break;
116 case PKEEPE:
117 // i-e_j to e_j
118 eind1 = Interval29Tools.check012(prevState - 18);
119 eind2 = Interval29Tools.check012(state - 1);
120 if (eind1 != eind2) {
121 valid = false;
122 } else {
123 valid = acceptorConstraintPlus(seq, pos);
124 }
125 //valid = false;
126 break;
127 case PKEEPI:
128 // e-i_j to i_j
129 iind1 = Interval29Tools.check012(prevState - 15);
130 iind2 = Interval29Tools.check012(state - 4);
131 if (iind1 != iind2) {
132 valid = false;
133 } else {
134 valid = pos-2 < 0 || donorConstraintPlus(seq, pos-2);
135 }
136 //valid = false;
137 break;
138 case MKEEPE:
139 // im-e_jm to e_jm
140 eind1 = Interval29Tools.check012(prevState - 26);
141 eind2 = Interval29Tools.check012(state - 7);
142 if (eind1 != eind2) {
143 valid = false;
144 } else {
145 valid = donorConstraintMinus(seq, pos);
146 }
147 //valid = false;
148 break;
149 case MKEEPI:
150 // em-i_jm to i_jm
151 iind1 = Interval29Tools.check012(prevState - 23);
152 iind2 = Interval29Tools.check012(state - 10);
153 if (iind1 != iind2) {
154 valid = false;
155 } else {
156 valid = pos-2 < 0 || acceptorConstraintMinus(seq, pos-2);
157 }
158 //valid = false;
159 break;
160 case PSTOPPED:
161 valid = pos-2 < 0 || stopEdgeConstraintPlus(seq, pos-2);
162 break;
163 case MSTARTED:
164 valid = pos-2 < 0 || startConstraintMinus(seq, pos-2);
165 break;
166 case PWILLSTART:
167 valid = pos+2 >= seq.length() || startConstraintPlus(seq, pos+2);
168 break;
169 case MWILLSTOP:
170 valid = pos+2 >= seq.length() || stopEdgeConstraintMinus(seq, pos+2);
171 break;
172 default:
173 Assert.a(false);
174 }
175
176 if(valid == false)
177 result.invalidate();
178 }
179
180 public void evaluateNode(InputSequence<? extends Character> seq, int pos, int state, FeatureList result) {
181 boolean valid = true;
182
183 int eind;
184
185 switch(Interval29Tools.nodeConstraints[state]) {
186 case NONE:
187 break;
188 case NEVER:
189 Assert.a(false);
190 break;
191 case PCODE:
192 eind = Interval29Tools.check012(state-1);
193 if ( (pos-eind)%3 == 2) {
194 valid = !stopNodeConstraintPlus(seq, pos);
195 }
196 break;
197 case MCODE:
198 eind = Interval29Tools.check012(state-7);
199 if ( (pos-eind)%3==0) {
200 valid = !stopNodeConstraintMinus(seq, pos);
201 }
202 break;
203 default:
204 Assert.a(false);
205 }
206 if(valid == false)
207 result.invalidate();
208 }
209
210
211 private boolean startConstraintPlus(InputSequence<? extends Character> seq, int pos) {
212 return (seq.length() > pos + 2) && seq.getX(pos) == 'A' && seq.getX(pos+1) == 'T' && seq.getX(pos+2) == 'G';
213 }
214
215 private boolean startConstraintMinus(InputSequence<? extends Character> seq, int pos) {
216 return (pos >= 3) && seq.getX(pos-3) == 'C' && seq.getX(pos-2) == 'A' && seq.getX(pos-1) == 'T';
217 }
218
219 private boolean donorConstraintPlus(InputSequence<? extends Character> seq, int pos) {
220 return (seq.length() > pos + 1) && seq.getX(pos) == 'G' && (seq.getX(pos+1) == 'T' || seq.getX(pos+1) == 'C');
221 }
222
223 private boolean donorConstraintMinus(InputSequence<? extends Character> seq, int pos) {
224 return (pos >= 2) && (seq.getX(pos-2) == 'A' || seq.getX(pos-2) == 'G') && seq.getX(pos-1) == 'C';
225 }
226
227 private boolean acceptorConstraintPlus(InputSequence<? extends Character> seq, int pos) {
228 return (pos > 1) && seq.getX(pos-2) == 'A' && seq.getX(pos-1) == 'G';
229 }
230
231 private boolean acceptorConstraintMinus(InputSequence<? extends Character> seq, int pos) {
232 return (seq.length() > pos + 1) && seq.getX(pos) == 'C' && seq.getX(pos+1) == 'T';
233 }
234
235 //////////////////////////////////
236
237
238 private boolean stopEdgeConstraintPlus(InputSequence<? extends Character> seq, int pos) {
239 if(pos < (seq.length()-2) && seq.getX(pos) == 'T') {
240 return (seq.getX(pos+1) == 'A' && (seq.getX(pos+2) == 'G' || seq.getX(pos+2) == 'A'))
241 || (seq.getX(pos+1) == 'G' && seq.getX(pos+2) == 'A');
242 }
243 return false;
244 }
245
246 private boolean stopEdgeConstraintMinus(InputSequence<? extends Character> seq, int pos) {
247 if(pos>=3 && seq.getX(pos-1) == 'A') {
248 return (seq.getX(pos-2) == 'T' && (seq.getX(pos-3) == 'C' || seq.getX(pos-3) == 'T'))
249 || (seq.getX(pos-2) == 'C' && seq.getX(pos-3) == 'T');
250 }
251 return false;
252 }
253
254 /////////////////////////////////////////
255
256 private boolean stopNodeConstraintPlus(InputSequence<? extends Character> seq, int pos) {
257 if(pos >= 2 && seq.getX(pos-2) == 'T') {
258 return (seq.getX(pos-1) == 'A' && (seq.getX(pos) == 'G' || seq.getX(pos) == 'A'))
259 || (seq.getX(pos-1) == 'G' && seq.getX(pos) == 'A');
260 }
261 return false;
262 }
263
264 private boolean stopNodeConstraintMinus(InputSequence<? extends Character> seq, int pos) {
265 if(pos<(seq.length()-2) && seq.getX(pos+2) == 'A') {
266 return (seq.getX(pos+1) == 'T' && (seq.getX(pos) == 'C' || seq.getX(pos) == 'T'))
267 || (seq.getX(pos+1) == 'C' && seq.getX(pos) == 'T');
268 }
269 return false;
270 }
271
272 @Override
273 public CacheStrategySpec getCacheStrategy() {
274 return new CacheStrategySpec(CacheStrategy.UNSPECIFIED);
275 }
276
277 }