001    package calhoun.analysis.crf.features.interval29;
002    
003    import java.util.List;
004    
005    import calhoun.analysis.crf.AbstractFeatureManager;
006    import calhoun.analysis.crf.CacheStrategySpec;
007    import calhoun.analysis.crf.FeatureList;
008    import calhoun.analysis.crf.FeatureManagerEdge;
009    import calhoun.analysis.crf.FeatureManagerNode;
010    import calhoun.analysis.crf.ModelManager;
011    import calhoun.analysis.crf.CacheStrategySpec.CacheStrategy;
012    import calhoun.analysis.crf.io.InputSequence;
013    import calhoun.analysis.crf.io.TrainingSequence;
014    import calhoun.util.Assert;
015    
016    /** Implements basic constraints on gene calls.
017     * 1) Intergenic - start must occur at ATG
018     * 2) Splice sites must be canonical GT/AG or GC/AG
019     * 3) Exon-stop must be followed by a start codon
020     */
021    public class GeneConstraintsInterval29  extends AbstractFeatureManager<Character> implements FeatureManagerEdge<Character>, FeatureManagerNode<Character> {
022            private static final long serialVersionUID = 3041359216265032511L;
023            
024            public String getFeatureName(int featureIndex) {
025                    return "Gene constraints for the model Interval29";
026            }
027    
028            /** This is a constraint class, so we don't return features */
029            public int getNumFeatures() {
030                    return 0;
031            }
032    
033            public void train(int startingIndex, ModelManager modelInfo, List<? extends TrainingSequence<? extends Character>> data) {
034                    Interval29Tools.verify(modelInfo);
035            }
036            
037            public void evaluateEdge(InputSequence<? extends Character> seq, int pos, int prevState, int state, FeatureList result) {
038                    boolean valid = true;
039                    
040                    int eind,iind;
041                    int eind1, eind2, iind1, iind2;
042                    switch(Interval29Tools.edgeConstraints[prevState*Interval29Tools.numStates + state]) {
043                    case NONE:
044                            break;
045                    case NEVER:
046                            Assert.a(false);
047                            break;
048                    case PSTART:
049                            // ig-e to exonE
050                            eind = Interval29Tools.check012(state-1);
051                            if ((pos-eind)%3 != 0) { valid = false; break; } 
052                            valid = startConstraintPlus(seq, pos);
053                            break;
054                    case PDON:
055                            // exonE to e-iI 
056                            // k = e-i
057                            iind = Interval29Tools.check012(state-15);
058                            eind = Interval29Tools.check012(prevState-1);
059                            if ((pos-eind+iind)%3 != 0) { valid = false; break; } 
060                            valid = pos < 0 || donorConstraintPlus(seq, pos);
061                            break;
062                    case PACC:
063                            // intronI to i-eE
064                            // k = e-i
065                            eind = Interval29Tools.check012(state-18);
066                            iind = Interval29Tools.check012(prevState-4);
067                            if ((pos+2-eind+iind)%3 != 0) { valid = false; break; } 
068                            valid = (pos+2 >= seq.length()) || acceptorConstraintPlus(seq, pos+2);
069                            break;
070                    case PSTOP:
071                            // exonE to e-ig
072                            eind = Interval29Tools.check012(prevState-1);
073                            if ((pos-eind)%3 != 0) { valid = false; break; } 
074                            valid = stopEdgeConstraintPlus(seq, pos);
075                            break; //done to here
076                    case MSTART:
077                            // exonEm to em-ig
078                            eind = Interval29Tools.check012(prevState-7);
079                            if ((pos-eind)%3 != 0) { valid = false; break; } 
080                            valid = startConstraintMinus(seq, pos);
081                            break;
082                    case MDON:
083                            // intronIm to im-eEm
084                            // k = e+i
085                            eind = Interval29Tools.check012(state-26);
086                            iind = Interval29Tools.check012(prevState-10);
087                            if ((pos+2-eind-iind)%3 != 0) { valid = false; break; } 
088                            valid = (pos+2 >= seq.length()) || donorConstraintMinus(seq, pos+2);
089                            break;
090                    case MACC:
091                            // exonEm to e-iIm
092                            // k = e+i
093                            iind = Interval29Tools.check012(state-23);
094                            eind = Interval29Tools.check012(prevState-7);           
095                            if ((pos-eind-iind)%3 != 0) { valid = false; break; }                   
096                            valid = pos < 0 || acceptorConstraintMinus(seq, pos);
097                            break;
098                    case MSTOP:
099                            // ig-em to exonEm
100                            eind = Interval29Tools.check012(state - 7);
101                            if ((pos-eind)%3 != 0) { valid = false; break; } 
102                            valid = stopEdgeConstraintMinus(seq, pos);
103                            break;
104                    case PCODE: // redundant with node invalidation below
105                            eind = Interval29Tools.check012(state-1);
106                            if ( (pos-eind)%3 == 2) {
107                                    valid = !stopNodeConstraintPlus(seq, pos);
108                            }
109                            break;
110                    case MCODE: // redundant with node evaluation below
111                            eind = Interval29Tools.check012(state-7);
112                            if ( (pos-eind)%3==0) {
113                                    valid = !stopNodeConstraintMinus(seq, pos);
114                            }
115                            break;
116                    case PKEEPE:
117                            // i-e_j to e_j
118                            eind1 = Interval29Tools.check012(prevState - 18);
119                            eind2 = Interval29Tools.check012(state - 1);
120                            if (eind1 != eind2) {
121                                    valid = false;
122                            } else {
123                                    valid = acceptorConstraintPlus(seq, pos);
124                            }
125                            //valid = false;
126                            break;
127                    case PKEEPI:
128                            // e-i_j to i_j
129                            iind1 = Interval29Tools.check012(prevState - 15);
130                            iind2 = Interval29Tools.check012(state - 4);
131                            if (iind1 != iind2) {
132                                    valid = false;
133                            } else {
134                                    valid = pos-2 < 0 || donorConstraintPlus(seq, pos-2);
135                            }               
136                            //valid = false;
137                            break;
138                    case MKEEPE:
139                            // im-e_jm to e_jm
140                            eind1 = Interval29Tools.check012(prevState - 26);
141                            eind2 = Interval29Tools.check012(state - 7);
142                            if (eind1 != eind2) {
143                                    valid = false;
144                            } else {
145                                    valid = donorConstraintMinus(seq, pos);
146                            }
147                            //valid = false;
148                            break;
149                    case MKEEPI:
150                            // em-i_jm to i_jm
151                            iind1 = Interval29Tools.check012(prevState - 23);
152                            iind2 = Interval29Tools.check012(state - 10);
153                            if (iind1 != iind2) {
154                                    valid = false;
155                            } else {
156                                    valid = pos-2 < 0 || acceptorConstraintMinus(seq, pos-2);
157                            }       
158                            //valid = false;
159                            break;
160                    case PSTOPPED:
161                            valid = pos-2 < 0 || stopEdgeConstraintPlus(seq, pos-2);
162                            break;
163                    case MSTARTED:
164                            valid = pos-2 < 0 || startConstraintMinus(seq, pos-2);
165                            break;
166                    case PWILLSTART:
167                            valid = pos+2 >= seq.length() || startConstraintPlus(seq, pos+2);
168                            break;
169                    case MWILLSTOP:
170                            valid = pos+2 >= seq.length() || stopEdgeConstraintMinus(seq, pos+2);
171                            break;
172                    default:
173                            Assert.a(false);
174                    }
175                    
176                    if(valid == false)
177                            result.invalidate();
178            }
179    
180            public void evaluateNode(InputSequence<? extends Character> seq, int pos, int state, FeatureList result) {
181                    boolean valid = true;
182            
183                    int eind;
184                    
185                    switch(Interval29Tools.nodeConstraints[state]) {
186                    case NONE:
187                            break;
188                    case NEVER:
189                            Assert.a(false);
190                            break;
191                    case PCODE:
192                            eind = Interval29Tools.check012(state-1);
193                            if ( (pos-eind)%3 == 2) {
194                                    valid = !stopNodeConstraintPlus(seq, pos);
195                            }
196                            break;
197                    case MCODE:
198                            eind = Interval29Tools.check012(state-7);
199                            if ( (pos-eind)%3==0) {
200                                    valid = !stopNodeConstraintMinus(seq, pos);
201                            }
202                            break;
203                    default:
204                            Assert.a(false);
205                    }       
206                    if(valid == false)
207                            result.invalidate();
208            }
209            
210            
211            private boolean startConstraintPlus(InputSequence<? extends Character> seq, int pos) {
212                    return (seq.length() > pos + 2) && seq.getX(pos) == 'A' && seq.getX(pos+1) == 'T' && seq.getX(pos+2) == 'G';
213            }
214    
215            private boolean startConstraintMinus(InputSequence<? extends Character> seq, int pos) {
216                    return (pos >= 3) && seq.getX(pos-3) == 'C' && seq.getX(pos-2) == 'A' && seq.getX(pos-1) == 'T';
217            }
218    
219            private boolean donorConstraintPlus(InputSequence<? extends Character> seq, int pos) {
220                    return (seq.length() > pos + 1) && seq.getX(pos) == 'G' && (seq.getX(pos+1) == 'T' || seq.getX(pos+1) == 'C');
221            }
222    
223            private boolean donorConstraintMinus(InputSequence<? extends Character> seq, int pos) {
224                    return (pos >= 2) && (seq.getX(pos-2) == 'A' || seq.getX(pos-2) == 'G') && seq.getX(pos-1) == 'C';
225            }
226    
227            private boolean acceptorConstraintPlus(InputSequence<? extends Character> seq, int pos) {
228                    return (pos > 1) && seq.getX(pos-2) == 'A' && seq.getX(pos-1) == 'G';
229            }
230    
231            private boolean acceptorConstraintMinus(InputSequence<? extends Character> seq, int pos) {
232                    return (seq.length() > pos + 1) && seq.getX(pos) == 'C' && seq.getX(pos+1) == 'T';
233            }
234    
235            //////////////////////////////////
236            
237            
238            private boolean stopEdgeConstraintPlus(InputSequence<? extends Character> seq, int pos) {
239                    if(pos < (seq.length()-2) && seq.getX(pos) == 'T') {
240                            return (seq.getX(pos+1) == 'A' && (seq.getX(pos+2) == 'G' || seq.getX(pos+2) == 'A'))
241                                            || (seq.getX(pos+1) == 'G' && seq.getX(pos+2) == 'A');
242                    }
243                    return false;
244            }
245    
246            private boolean stopEdgeConstraintMinus(InputSequence<? extends Character> seq, int pos) {
247                    if(pos>=3 && seq.getX(pos-1) == 'A') {
248                            return (seq.getX(pos-2) == 'T' && (seq.getX(pos-3) == 'C' || seq.getX(pos-3) == 'T'))
249                                            || (seq.getX(pos-2) == 'C' && seq.getX(pos-3) == 'T');
250                    }
251                    return false;
252            }
253            
254            /////////////////////////////////////////
255            
256            private boolean stopNodeConstraintPlus(InputSequence<? extends Character> seq, int pos) {
257                    if(pos >= 2 && seq.getX(pos-2) == 'T') {
258                            return (seq.getX(pos-1) == 'A' && (seq.getX(pos) == 'G' || seq.getX(pos) == 'A'))
259                                            || (seq.getX(pos-1) == 'G' && seq.getX(pos) == 'A');
260                    }
261                    return false;
262            }
263    
264            private boolean stopNodeConstraintMinus(InputSequence<? extends Character> seq, int pos) {
265                    if(pos<(seq.length()-2) && seq.getX(pos+2) == 'A') {
266                            return (seq.getX(pos+1) == 'T' && (seq.getX(pos) == 'C' || seq.getX(pos) == 'T'))
267                                            || (seq.getX(pos+1) == 'C' && seq.getX(pos) == 'T');
268                    }
269                    return false;
270            }
271            
272            @Override
273            public CacheStrategySpec getCacheStrategy() {
274                    return new CacheStrategySpec(CacheStrategy.UNSPECIFIED);
275            }
276    
277    }