001    package calhoun.analysis.crf.features.interval13;
002    
003    import java.util.List;
004    
005    import org.apache.commons.logging.Log;
006    import org.apache.commons.logging.LogFactory;
007    
008    import calhoun.analysis.crf.AbstractFeatureManager;
009    import calhoun.analysis.crf.CacheStrategySpec;
010    import calhoun.analysis.crf.FeatureList;
011    import calhoun.analysis.crf.FeatureManagerEdge;
012    import calhoun.analysis.crf.FeatureManagerNode;
013    import calhoun.analysis.crf.ModelManager;
014    import calhoun.analysis.crf.CacheStrategySpec.CacheStrategy;
015    import calhoun.analysis.crf.io.InputSequence;
016    import calhoun.analysis.crf.io.TrainingSequence;
017    import calhoun.util.Assert;
018    
019    /** Implements basic constraints on gene calls.
020     * 1) Intergenic - start must occur at ATG
021     * 2) Splice sites must be canonical GT/AG or GC/AG
022     * 3) Exon-stop must be followed by a start codon
023     */
024    public class GeneConstraintsInterval13  extends AbstractFeatureManager<Character> implements FeatureManagerEdge<Character>, FeatureManagerNode<Character> {
025            @SuppressWarnings("unused")
026            private static final Log log = LogFactory.getLog(GeneConstraintsInterval13.class);
027            private static final long serialVersionUID = 3041359216265032511L;
028            
029            public String getFeatureName(int featureIndex) {
030                    return "Gene constraints for the model Interval13";
031            }
032    
033            /** This is a constraint class, so we don't return features */
034            public int getNumFeatures() {
035                    return 0;
036            }
037    
038            public void train(int startingIndex, ModelManager modelInfo, List<? extends TrainingSequence<? extends Character>> data) {
039                    Interval13Tools.verify(modelInfo);
040            }
041            
042            public void evaluateEdge(InputSequence<? extends Character> seq, int pos, int prevState, int state, FeatureList result) {
043                    boolean valid = true;
044                    
045                    int eind,iind;
046                    
047                    switch(Interval13Tools.edgeConstraints[prevState*Interval13Tools.numStates + state]) {
048                    case NONE:
049                            break;
050                    case NEVER:
051                            Assert.a(false);
052                            break;
053                    case PSTART:
054                            eind = Interval13Tools.check012(state-1);
055                            if ((pos-eind)%3 != 0) { valid = false; break; } 
056                            valid = startConstraintPlus(seq, pos);
057                            break;
058                    case PDON:
059                            iind = Interval13Tools.check012(state-4);
060                            eind = Interval13Tools.check012(prevState-1);
061                            if ((pos-eind+iind)%3 != 0) { valid = false; break; } 
062                            valid = donorConstraintPlus(seq, pos);
063                            break;
064                    case PACC:
065                            iind = Interval13Tools.check012(prevState-4);
066                            eind = Interval13Tools.check012(state-1);
067                            if ((pos-eind+iind)%3 != 0) { valid = false; break; } 
068                            valid = acceptorConstraintPlus(seq, pos);
069                            break;
070                    case PSTOP:
071                            eind = Interval13Tools.check012(prevState-1);
072                            if ((pos-eind)%3 != 0) { valid = false; break; } 
073                            valid = stopEdgeConstraintPlus(seq, pos);
074                            break;
075                    case MSTART:
076                            eind = Interval13Tools.check012(prevState-7);
077                            if ((pos-eind)%3 != 0) { valid = false; break; } 
078                            valid = startConstraintMinus(seq, pos);
079                            break;
080                    case MDON:
081                            iind = Interval13Tools.check012(prevState-10);
082                            eind = Interval13Tools.check012(state-7);
083                            if ((pos-eind-iind)%3 != 0) { valid = false; break; } 
084                            valid = donorConstraintMinus(seq, pos);
085                            break;
086                    case MACC:
087                            iind = Interval13Tools.check012(state-10);
088                            eind = Interval13Tools.check012(prevState-7);
089                            if ((pos-eind-iind)%3 != 0) { valid = false; break; }                   
090                            valid = acceptorConstraintMinus(seq, pos);
091                            break;
092                    case MSTOP:
093                            eind = Interval13Tools.check012(state-7);
094                            if ((pos-eind)%3 != 0) { valid = false; break; } 
095                            valid = stopEdgeConstraintMinus(seq, pos);
096                            break;
097                    case PCODE: // redundant with node invalidation below
098                            eind = Interval13Tools.check012(state-1);
099                            if ( (pos-eind)%3 == 2) {
100                                    valid = !stopNodeConstraintPlus(seq, pos);
101                            }
102                            break;
103                    case MCODE: // redundant iwth node evaluation below
104                            eind = Interval13Tools.check012(state-7);
105                            if ( (pos-eind)%3==0) {
106                                    valid = !stopNodeConstraintMinus(seq, pos);
107                            }
108                            break;
109                    default:
110                            Assert.a(false);
111                    }
112                    
113                    // This debugging code is pretty clutch
114    //              if(!valid) {
115    //                      String str = "";
116    //                      for (int i = -8; i < 9; i++) {
117    //                              str += seq.getX(pos+i);
118    //                      }
119    //                      System.out.println("        v        ");
120    //                      System.out.println(str);
121    //                      System.out.println(seq.toString());
122    //              }                       
123                    
124                    if(valid == false)
125                            result.invalidate();
126            }
127    
128            public void evaluateNode(InputSequence<? extends Character> seq, int pos, int state, FeatureList result) {
129                    boolean valid = true;
130            
131                    int eind;
132                    
133                    switch(Interval13Tools.nodeConstraints[state]) {
134                    case NONE:
135                            break;
136                    case NEVER:
137                            Assert.a(false);
138                            break;
139                    case PCODE:
140                            eind = Interval13Tools.check012(state-1);
141                            if ( (pos-eind)%3 == 2) {
142                                    valid = !stopNodeConstraintPlus(seq, pos);
143                            }
144                            break;
145                    case MCODE:
146                            eind = Interval13Tools.check012(state-7);
147                            if ( (pos-eind)%3==0) {
148                                    valid = !stopNodeConstraintMinus(seq, pos);
149                            }
150                            break;
151                    default:
152                            Assert.a(false);
153                    }       
154                    if(valid == false)
155                            result.invalidate();
156            }
157            
158            
159            private boolean startConstraintPlus(InputSequence<? extends Character> seq, int pos) {
160                    return (seq.length() > pos + 2) && seq.getX(pos) == 'A' && seq.getX(pos+1) == 'T' && seq.getX(pos+2) == 'G';
161            }
162    
163            private boolean startConstraintMinus(InputSequence<? extends Character> seq, int pos) {
164                    return (pos >= 3) && seq.getX(pos-3) == 'C' && seq.getX(pos-2) == 'A' && seq.getX(pos-1) == 'T';
165            }
166    
167            private boolean donorConstraintPlus(InputSequence<? extends Character> seq, int pos) {
168                    return (seq.length() > pos + 1) && seq.getX(pos) == 'G' && (seq.getX(pos+1) == 'T' || seq.getX(pos+1) == 'C');
169            }
170    
171            private boolean donorConstraintMinus(InputSequence<? extends Character> seq, int pos) {
172                    boolean ret = (pos >= 2) && (seq.getX(pos-2) == 'A' || seq.getX(pos-2) == 'G') && seq.getX(pos-1) == 'C';
173                    //if(!ret) log.warn("Seq wrong at MDON");
174                    return ret;
175            }
176    
177            private boolean acceptorConstraintPlus(InputSequence<? extends Character> seq, int pos) {
178                    return (pos > 1) && seq.getX(pos-2) == 'A' && seq.getX(pos-1) == 'G';
179            }
180    
181            private boolean acceptorConstraintMinus(InputSequence<? extends Character> seq, int pos) {
182                    boolean ret = (seq.length() > pos + 1) && seq.getX(pos) == 'C' && seq.getX(pos+1) == 'T';
183                    //if(!ret) log.warn("Seq wrong at MACC - expected CT but was "+seq.getX(pos)+seq.getX(pos+1));
184                    return ret;
185            }
186    
187            //////////////////////////////////
188            
189            
190            private boolean stopEdgeConstraintPlus(InputSequence<? extends Character> seq, int pos) {
191                    if(pos < (seq.length()-2) && seq.getX(pos) == 'T') {
192                            return (seq.getX(pos+1) == 'A' && (seq.getX(pos+2) == 'G' || seq.getX(pos+2) == 'A'))
193                                            || (seq.getX(pos+1) == 'G' && seq.getX(pos+2) == 'A');
194                    }
195                    return false;
196            }
197    
198            private boolean stopEdgeConstraintMinus(InputSequence<? extends Character> seq, int pos) {
199                    if(pos>=3 && seq.getX(pos-1) == 'A') {
200                            boolean ret = (seq.getX(pos-2) == 'T' && (seq.getX(pos-3) == 'C' || seq.getX(pos-3) == 'T'))
201                                            || (seq.getX(pos-2) == 'C' && seq.getX(pos-3) == 'T');
202                            //if(!ret) log.warn("Seq wrong at MSTOP edge entry");
203                            return ret;
204                    }
205                    //log.warn("Seq wrong at MSTOP edge exit");
206                    return false;
207            }
208            
209            /////////////////////////////////////////
210            
211            private boolean stopNodeConstraintPlus(InputSequence<? extends Character> seq, int pos) {
212                    if(pos >= 2 && seq.getX(pos-2) == 'T') {
213                            return (seq.getX(pos-1) == 'A' && (seq.getX(pos) == 'G' || seq.getX(pos) == 'A'))
214                                            || (seq.getX(pos-1) == 'G' && seq.getX(pos) == 'A');
215                    }
216                    return false;
217            }
218    
219            private boolean stopNodeConstraintMinus(InputSequence<? extends Character> seq, int pos) {
220                    if(pos<(seq.length()-2) && seq.getX(pos+2) == 'A') {
221                            boolean ret = (seq.getX(pos+1) == 'T' && (seq.getX(pos) == 'C' || seq.getX(pos) == 'T'))
222                                            || (seq.getX(pos+1) == 'C' && seq.getX(pos) == 'T');
223                            return ret;
224                    }
225                    return false;
226            }
227            
228            @Override
229            public CacheStrategySpec getCacheStrategy() {
230                    return new CacheStrategySpec(CacheStrategy.UNSPECIFIED);
231            }
232    
233    }