001    package calhoun.analysis.crf.features.interval13;
002    
003    import java.util.List;
004    
005    import org.apache.commons.logging.Log;
006    import org.apache.commons.logging.LogFactory;
007    
008    import calhoun.analysis.crf.AbstractFeatureManager;
009    import calhoun.analysis.crf.CacheStrategySpec;
010    import calhoun.analysis.crf.FeatureList;
011    import calhoun.analysis.crf.FeatureManagerEdge;
012    import calhoun.analysis.crf.FeatureManagerNode;
013    import calhoun.analysis.crf.ModelManager;
014    import calhoun.analysis.crf.CacheStrategySpec.CacheStrategy;
015    import calhoun.analysis.crf.io.CompositeInput;
016    import calhoun.analysis.crf.io.InputSequence;
017    import calhoun.analysis.crf.io.TrainingSequence;
018    
019    public class ESTInterval13 extends AbstractFeatureManager<CompositeInput> implements FeatureManagerNode<CompositeInput> , FeatureManagerEdge<CompositeInput> {
020            private static final long serialVersionUID = -7659288739348604129L;
021            private static final Log log = LogFactory.getLog(ESTInterval13.class);
022            boolean debug = log.isDebugEnabled();
023            
024    
025            
026            int startIx;  // The index of the first feature managed by this FeatureManager
027            ModelManager model;
028            
029            boolean[] intergenicState;
030            boolean[] plusExonState;
031            boolean[] minusExonState;
032            boolean[] plusIntronState;
033            boolean[] minusIntronState;     
034            
035            public ESTInterval13() {
036            }
037    
038            public int getNumFeatures() {
039                    return 9;
040            }       
041            
042            public String getFeatureName(int featureIndex) {
043                    String[] types = new String[] {"exon", "intron", "Intergenic"};
044                    String[] evidence = new String[] {"consistent", "mixed"};
045                    int raw = featureIndex-startIx;
046                    if(raw < 6) {
047                            return "EST "+types[raw/2]+" "+evidence[raw%2];
048                    }
049                    else {
050                            String[] edge = new String[] {"acceptor", "donor", "no edge"};
051                            return "EST "+edge[raw-6];
052                    }
053            }
054    
055            public void evaluateNode(InputSequence<? extends CompositeInput> seq, int pos, int state, FeatureList result) {
056                    if(pos == seq.length()-1) {
057                            return;
058                    }               
059    
060                    InputSequence<Integer>  pest = (InputSequence<Integer>) seq.getComponent("pest");
061                    InputSequence<Integer>  mest = (InputSequence<Integer>) seq.getComponent("mest");
062    
063                    int plusEst  = pest.getX(pos+1);
064                    int minusEst = mest.getX(pos+1);
065                    // 0 - no data
066                    // 1 - exon only
067                    // 2 - intron only
068                    // 3 - mixed
069    
070                    if (plusExonState[state] && (plusEst==1)) { result.addFeature(startIx, 1); }
071                    if (minusExonState[state] && (minusEst==1)) { result.addFeature(startIx, 1); }
072    
073                    if (plusExonState[state] && (plusEst==3)) { result.addFeature(startIx+1, 1); }
074                    if (minusExonState[state] && (minusEst==3)) { result.addFeature(startIx+1, 1); }
075                    
076                    if (plusIntronState[state] && (plusEst==2)) { result.addFeature(startIx+2, 1); }
077                    if (minusIntronState[state] && (minusEst==2)) { result.addFeature(startIx+2, 1); }
078    
079                    if (plusIntronState[state] && (plusEst==3)) { result.addFeature(startIx+3, 1); }
080                    if (minusIntronState[state] && (minusEst==3)) { result.addFeature(startIx+3, 1); }                      
081    
082                    if (intergenicState[state] && (plusEst==1 || minusEst==1 || plusEst==2 || minusEst==2)) { result.addFeature(startIx+4, 1); }                    
083                    if (intergenicState[state] && (plusEst==3 || minusEst==3)) { result.addFeature(startIx+5, 1); }                 
084            }
085    
086            public void evaluateEdge(InputSequence<? extends CompositeInput> seq, int pos, int prevState, int state, FeatureList result) {
087                    if( (pos == seq.length()-1) || (pos == 0) ) {
088                            return;
089                    }               
090                    
091                    InputSequence<Integer>  pest = (InputSequence<Integer>) seq.getComponent("pest");
092                    InputSequence<Integer>  mest = (InputSequence<Integer>) seq.getComponent("mest");
093    
094                    int plusE  = pest.getX(pos+1); int plusEp = pest.getX(pos);
095                    int minusE = mest.getX(pos+1); int minusEp = mest.getX(pos);
096                    
097                    Boolean plusacc = ((plusEp==2) && (plusE==1));
098                    Boolean plusdon = ((plusEp==1) && (plusE==2));
099                    Boolean minusacc = ((minusEp==1) && (minusE==2));
100                    Boolean minusdon = ((minusEp==2) && (minusE==1));
101                    
102                    if (plusacc  && plusExonState[state] && plusIntronState[prevState]) { result.addFeature(startIx+6,1); return; }
103                    if (minusacc && minusExonState[prevState] && minusIntronState[state]) { result.addFeature(startIx+6,1); return;}
104                    
105                    if (plusdon && plusIntronState[state] && plusExonState[prevState]) { result.addFeature(startIx+7,1); return;}
106                    if (minusdon && plusIntronState[prevState] && plusExonState[state]) { result.addFeature(startIx+7,1); return;}
107    
108                    result.addFeature(startIx+8,1);
109            }
110                    
111            public void train(int startingIndex, ModelManager modelInfo, List<? extends TrainingSequence<? extends CompositeInput>> data) {
112                    startIx = startingIndex;
113                    model = modelInfo;
114    
115                    int nStates = model.getNumStates();
116                    
117                    plusExonState = new boolean[nStates];
118                    plusExonState[model.getStateIndex("exon0")] = true;
119                    plusExonState[model.getStateIndex("exon1")] = true;
120                    plusExonState[model.getStateIndex("exon2")] = true;
121                    
122                    minusExonState = new boolean[nStates]; 
123                    minusExonState[model.getStateIndex("exon0m")] = true;
124                    minusExonState[model.getStateIndex("exon1m")] = true;
125                    minusExonState[model.getStateIndex("exon2m")] = true;           
126                    
127                    plusIntronState = new boolean[nStates];
128                    plusIntronState[model.getStateIndex("intron0")] = true;
129                    plusIntronState[model.getStateIndex("intron1")] = true;
130                    plusIntronState[model.getStateIndex("intron2")] = true;
131                    
132                    minusIntronState = new boolean[nStates]; 
133                    minusIntronState[model.getStateIndex("intron0m")] = true;
134                    minusIntronState[model.getStateIndex("intron1m")] = true;
135                    minusIntronState[model.getStateIndex("intron2m")] = true;       
136    
137                    intergenicState = new boolean[nStates];
138                    intergenicState[model.getStateIndex("intergenic")] = true;
139            }
140    
141            @Override
142            public CacheStrategySpec getCacheStrategy() {
143                    return new CacheStrategySpec(CacheStrategy.DENSE);
144            }
145    
146    }
147