001 package calhoun.analysis.crf.features.interval13;
002
003 import java.util.List;
004
005 import org.apache.commons.logging.Log;
006 import org.apache.commons.logging.LogFactory;
007
008 import calhoun.analysis.crf.AbstractFeatureManager;
009 import calhoun.analysis.crf.CacheStrategySpec;
010 import calhoun.analysis.crf.FeatureList;
011 import calhoun.analysis.crf.FeatureManagerEdge;
012 import calhoun.analysis.crf.FeatureManagerNode;
013 import calhoun.analysis.crf.ModelManager;
014 import calhoun.analysis.crf.CacheStrategySpec.CacheStrategy;
015 import calhoun.analysis.crf.io.CompositeInput;
016 import calhoun.analysis.crf.io.InputSequence;
017 import calhoun.analysis.crf.io.TrainingSequence;
018
019 public class ESTInterval13 extends AbstractFeatureManager<CompositeInput> implements FeatureManagerNode<CompositeInput> , FeatureManagerEdge<CompositeInput> {
020 private static final long serialVersionUID = -7659288739348604129L;
021 private static final Log log = LogFactory.getLog(ESTInterval13.class);
022 boolean debug = log.isDebugEnabled();
023
024
025
026 int startIx; // The index of the first feature managed by this FeatureManager
027 ModelManager model;
028
029 boolean[] intergenicState;
030 boolean[] plusExonState;
031 boolean[] minusExonState;
032 boolean[] plusIntronState;
033 boolean[] minusIntronState;
034
035 public ESTInterval13() {
036 }
037
038 public int getNumFeatures() {
039 return 9;
040 }
041
042 public String getFeatureName(int featureIndex) {
043 String[] types = new String[] {"exon", "intron", "Intergenic"};
044 String[] evidence = new String[] {"consistent", "mixed"};
045 int raw = featureIndex-startIx;
046 if(raw < 6) {
047 return "EST "+types[raw/2]+" "+evidence[raw%2];
048 }
049 else {
050 String[] edge = new String[] {"acceptor", "donor", "no edge"};
051 return "EST "+edge[raw-6];
052 }
053 }
054
055 public void evaluateNode(InputSequence<? extends CompositeInput> seq, int pos, int state, FeatureList result) {
056 if(pos == seq.length()-1) {
057 return;
058 }
059
060 InputSequence<Integer> pest = (InputSequence<Integer>) seq.getComponent("pest");
061 InputSequence<Integer> mest = (InputSequence<Integer>) seq.getComponent("mest");
062
063 int plusEst = pest.getX(pos+1);
064 int minusEst = mest.getX(pos+1);
065 // 0 - no data
066 // 1 - exon only
067 // 2 - intron only
068 // 3 - mixed
069
070 if (plusExonState[state] && (plusEst==1)) { result.addFeature(startIx, 1); }
071 if (minusExonState[state] && (minusEst==1)) { result.addFeature(startIx, 1); }
072
073 if (plusExonState[state] && (plusEst==3)) { result.addFeature(startIx+1, 1); }
074 if (minusExonState[state] && (minusEst==3)) { result.addFeature(startIx+1, 1); }
075
076 if (plusIntronState[state] && (plusEst==2)) { result.addFeature(startIx+2, 1); }
077 if (minusIntronState[state] && (minusEst==2)) { result.addFeature(startIx+2, 1); }
078
079 if (plusIntronState[state] && (plusEst==3)) { result.addFeature(startIx+3, 1); }
080 if (minusIntronState[state] && (minusEst==3)) { result.addFeature(startIx+3, 1); }
081
082 if (intergenicState[state] && (plusEst==1 || minusEst==1 || plusEst==2 || minusEst==2)) { result.addFeature(startIx+4, 1); }
083 if (intergenicState[state] && (plusEst==3 || minusEst==3)) { result.addFeature(startIx+5, 1); }
084 }
085
086 public void evaluateEdge(InputSequence<? extends CompositeInput> seq, int pos, int prevState, int state, FeatureList result) {
087 if( (pos == seq.length()-1) || (pos == 0) ) {
088 return;
089 }
090
091 InputSequence<Integer> pest = (InputSequence<Integer>) seq.getComponent("pest");
092 InputSequence<Integer> mest = (InputSequence<Integer>) seq.getComponent("mest");
093
094 int plusE = pest.getX(pos+1); int plusEp = pest.getX(pos);
095 int minusE = mest.getX(pos+1); int minusEp = mest.getX(pos);
096
097 Boolean plusacc = ((plusEp==2) && (plusE==1));
098 Boolean plusdon = ((plusEp==1) && (plusE==2));
099 Boolean minusacc = ((minusEp==1) && (minusE==2));
100 Boolean minusdon = ((minusEp==2) && (minusE==1));
101
102 if (plusacc && plusExonState[state] && plusIntronState[prevState]) { result.addFeature(startIx+6,1); return; }
103 if (minusacc && minusExonState[prevState] && minusIntronState[state]) { result.addFeature(startIx+6,1); return;}
104
105 if (plusdon && plusIntronState[state] && plusExonState[prevState]) { result.addFeature(startIx+7,1); return;}
106 if (minusdon && plusIntronState[prevState] && plusExonState[state]) { result.addFeature(startIx+7,1); return;}
107
108 result.addFeature(startIx+8,1);
109 }
110
111 public void train(int startingIndex, ModelManager modelInfo, List<? extends TrainingSequence<? extends CompositeInput>> data) {
112 startIx = startingIndex;
113 model = modelInfo;
114
115 int nStates = model.getNumStates();
116
117 plusExonState = new boolean[nStates];
118 plusExonState[model.getStateIndex("exon0")] = true;
119 plusExonState[model.getStateIndex("exon1")] = true;
120 plusExonState[model.getStateIndex("exon2")] = true;
121
122 minusExonState = new boolean[nStates];
123 minusExonState[model.getStateIndex("exon0m")] = true;
124 minusExonState[model.getStateIndex("exon1m")] = true;
125 minusExonState[model.getStateIndex("exon2m")] = true;
126
127 plusIntronState = new boolean[nStates];
128 plusIntronState[model.getStateIndex("intron0")] = true;
129 plusIntronState[model.getStateIndex("intron1")] = true;
130 plusIntronState[model.getStateIndex("intron2")] = true;
131
132 minusIntronState = new boolean[nStates];
133 minusIntronState[model.getStateIndex("intron0m")] = true;
134 minusIntronState[model.getStateIndex("intron1m")] = true;
135 minusIntronState[model.getStateIndex("intron2m")] = true;
136
137 intergenicState = new boolean[nStates];
138 intergenicState[model.getStateIndex("intergenic")] = true;
139 }
140
141 @Override
142 public CacheStrategySpec getCacheStrategy() {
143 return new CacheStrategySpec(CacheStrategy.DENSE);
144 }
145
146 }
147