001 package calhoun.analysis.crf.features.interval29;
002
003 import java.util.List;
004
005 import org.apache.commons.logging.Log;
006 import org.apache.commons.logging.LogFactory;
007
008 import calhoun.analysis.crf.AbstractFeatureManager;
009 import calhoun.analysis.crf.CacheStrategySpec;
010 import calhoun.analysis.crf.FeatureList;
011 import calhoun.analysis.crf.FeatureManagerEdge;
012 import calhoun.analysis.crf.FeatureManagerNode;
013 import calhoun.analysis.crf.ModelManager;
014 import calhoun.analysis.crf.CacheStrategySpec.CacheStrategy;
015 import calhoun.analysis.crf.io.CompositeInput;
016 import calhoun.analysis.crf.io.InputSequence;
017 import calhoun.analysis.crf.io.TrainingSequence;
018
019 public class ESTInterval29 extends AbstractFeatureManager<CompositeInput> implements FeatureManagerNode<CompositeInput> , FeatureManagerEdge<CompositeInput> {
020 private static final long serialVersionUID = -7659288739348604129L;
021 private static final Log log = LogFactory.getLog(ESTInterval29.class);
022 boolean debug = log.isDebugEnabled();
023
024
025
026 int startIx; // The index of the first feature managed by this FeatureManager
027 ModelManager model;
028
029 boolean[] plusExonState;
030 boolean[] minusExonState;
031 boolean[] plusIntronState;
032 boolean[] minusIntronState;
033
034 public ESTInterval29() {
035 }
036
037 public int getNumFeatures() {
038 return 6;
039 }
040
041 public String getFeatureName(int featureIndex) {
042 return "ESTEdge";
043 }
044
045 public void evaluateNode(InputSequence<? extends CompositeInput> seq, int pos, int state, FeatureList result) {
046 if(pos == seq.length()-1) {
047 return;
048 }
049
050 InputSequence<Integer> pest = (InputSequence<Integer>) seq.getComponent("pest");
051 InputSequence<Integer> mest = (InputSequence<Integer>) seq.getComponent("mest");
052
053 int plusEst = pest.getX(pos+1);
054 int minusEst = mest.getX(pos+1);
055 // 0 - no data
056 // 1 - exon only
057 // 2 - intron only
058 // 3 - mixed
059
060 if (plusExonState[state] && (plusEst==1)) { result.addFeature(startIx, 1); }
061 if (minusExonState[state] && (minusEst==1)) { result.addFeature(startIx, 1); }
062
063 if (plusExonState[state] && (plusEst==3)) { result.addFeature(startIx+1, 1); }
064 if (minusExonState[state] && (minusEst==3)) { result.addFeature(startIx+1, 1); }
065
066 if (plusIntronState[state] && (plusEst==2)) { result.addFeature(startIx+2, 1); }
067 if (minusIntronState[state] && (minusEst==2)) { result.addFeature(startIx+2, 1); }
068
069 if (plusIntronState[state] && (plusEst==3)) { result.addFeature(startIx+3, 1); }
070 if (minusIntronState[state] && (minusEst==3)) { result.addFeature(startIx+3, 1); }
071 }
072
073 public void evaluateEdge(InputSequence<? extends CompositeInput> seq, int pos, int prevState, int state, FeatureList result) {
074 if( (pos == seq.length()-1) || (pos == 0) ) {
075 return;
076 }
077
078 InputSequence<Integer> pest = (InputSequence<Integer>) seq.getComponent("pest");
079 InputSequence<Integer> mest = (InputSequence<Integer>) seq.getComponent("mest");
080
081 int plusE = pest.getX(pos+1); int plusEp = pest.getX(pos);
082 int minusE = mest.getX(pos+1); int minusEp = mest.getX(pos);
083
084 Boolean plusacc = ((plusEp==2) && (plusE==1));
085 Boolean plusdon = ((plusEp==1) && (plusE==2));
086 Boolean minusacc = ((minusEp==1) && (minusE==2));
087 Boolean minusdon = ((minusEp==2) && (minusE==1));
088
089 if (plusacc && plusExonState[state] && plusIntronState[prevState]) { result.addFeature(startIx+4,1); }
090 if (minusacc && minusExonState[prevState] && minusIntronState[state]) { result.addFeature(startIx+4,1); }
091
092 if (plusdon && plusIntronState[state] && plusExonState[prevState]) { result.addFeature(startIx+5,1); }
093 if (minusdon && plusIntronState[prevState] && plusExonState[state]) { result.addFeature(startIx+5,1); }
094
095 }
096
097 public void train(int startingIndex, ModelManager modelInfo, List<? extends TrainingSequence<? extends CompositeInput>> data) {
098 startIx = startingIndex;
099 model = modelInfo;
100
101 int nStates = model.getNumStates();
102
103 plusExonState = new boolean[nStates];
104 for (int j=0; j<nStates; j++) { plusExonState[j] = false; }
105 plusExonState[model.getStateIndex("exon0")] = true;
106 plusExonState[model.getStateIndex("exon1")] = true;
107 plusExonState[model.getStateIndex("exon2")] = true;
108
109 minusExonState = new boolean[nStates];
110 for (int j=0; j<nStates; j++) { minusExonState[j] = false; }
111 minusExonState[model.getStateIndex("exon0m")] = true;
112 minusExonState[model.getStateIndex("exon1m")] = true;
113 minusExonState[model.getStateIndex("exon2m")] = true;
114
115 plusIntronState = new boolean[nStates];
116 for (int j=0; j<nStates; j++) { plusIntronState[j] = false; }
117 plusIntronState[model.getStateIndex("e-i0")] = true;
118 plusIntronState[model.getStateIndex("e-i1")] = true;
119 plusIntronState[model.getStateIndex("e-i2")] = true;
120 plusIntronState[model.getStateIndex("intron0")] = true;
121 plusIntronState[model.getStateIndex("intron1")] = true;
122 plusIntronState[model.getStateIndex("intron2")] = true;
123 plusIntronState[model.getStateIndex("i-e0")] = true;
124 plusIntronState[model.getStateIndex("i-e1")] = true;
125 plusIntronState[model.getStateIndex("i-e2")] = true;
126
127 minusIntronState = new boolean[nStates];
128 for (int j=0; j<nStates; j++) { minusIntronState[j] = false; }
129 minusIntronState[model.getStateIndex("em-i0m")] = true;
130 minusIntronState[model.getStateIndex("em-i1m")] = true;
131 minusIntronState[model.getStateIndex("em-i2m")] = true;
132 minusIntronState[model.getStateIndex("intron0m")] = true;
133 minusIntronState[model.getStateIndex("intron1m")] = true;
134 minusIntronState[model.getStateIndex("intron2m")] = true;
135 minusIntronState[model.getStateIndex("im-e0m")] = true;
136 minusIntronState[model.getStateIndex("im-e1m")] = true;
137 minusIntronState[model.getStateIndex("im-e2m")] = true;
138 }
139
140 public CacheStrategySpec getCacheStrategy() {
141 return new CacheStrategySpec(CacheStrategy.DENSE);
142 }
143
144 }
145