001 package calhoun.analysis.crf.features.tricycle13;
002
003 import java.util.List;
004
005 import org.apache.commons.logging.Log;
006 import org.apache.commons.logging.LogFactory;
007
008 import calhoun.analysis.crf.AbstractFeatureManager;
009 import calhoun.analysis.crf.CacheStrategySpec;
010 import calhoun.analysis.crf.FeatureList;
011 import calhoun.analysis.crf.FeatureManagerNode;
012 import calhoun.analysis.crf.ModelManager;
013 import calhoun.analysis.crf.CacheStrategySpec.CacheStrategy;
014 import calhoun.analysis.crf.io.CompositeInput;
015 import calhoun.analysis.crf.io.InputSequence;
016 import calhoun.analysis.crf.io.TrainingSequence;
017
018 public class PfamGenic extends AbstractFeatureManager<CompositeInput> implements FeatureManagerNode<CompositeInput> {
019 private static final long serialVersionUID = -7659288739348604129L;
020 private static final Log log = LogFactory.getLog(PfamGenic.class);
021 boolean debug = log.isDebugEnabled();
022
023 /* Contains 1 features:
024 * f returns 1 if either of two conditions below and 0 otherwise:
025 * a) y_i=intron1,intron2,intron3 and pest(i+1) = 2 [intron only]
026 * b) y_i= intron1m,intron2m,intron3m and mest(i+1) = 2 [intron only]
027 */
028
029 int startIx; // The index of the first feature managed by this FeatureManager
030 ModelManager model;
031
032 boolean[] plusGenic;
033 boolean[] minusGenic;
034
035
036 public PfamGenic() {
037 }
038
039 public int getNumFeatures() {
040 return 1;
041 }
042
043 public String getFeatureName(int featureIndex) {
044 return "PfamGenic";
045 }
046
047
048 public void evaluateNode(InputSequence<? extends CompositeInput> seq, int pos, int state, FeatureList result) {
049 if(pos == seq.length()-1) {
050 return;
051 }
052
053 InputSequence<Integer> ppfam = (InputSequence<Integer>) seq.getComponent("ppfam");
054 InputSequence<Integer> mpfam = (InputSequence<Integer>) seq.getComponent("mpfam");
055
056 boolean plusPfam = (ppfam.getX(pos) > 0);
057 boolean minusPfam = (mpfam.getX(pos) > 0);
058
059 if (plusGenic[state] && plusPfam) { result.addFeature(startIx, 1); }
060 if (minusGenic[state] && minusPfam) { result.addFeature(startIx, 1); }
061 }
062
063
064 public void train(int startingIndex, ModelManager modelInfo, List<? extends TrainingSequence<? extends CompositeInput>> data) {
065 startIx = startingIndex;
066 model = modelInfo;
067
068 int nStates = model.getNumStates();
069
070 plusGenic = new boolean[nStates];
071 for (int j=0; j<nStates; j++) { plusGenic[j] = false; }
072 plusGenic[model.getStateIndex("exon1")] = true;
073 plusGenic[model.getStateIndex("exon2")] = true;
074 plusGenic[model.getStateIndex("exon3")] = true;
075 plusGenic[model.getStateIndex("intron1")] = true;
076 plusGenic[model.getStateIndex("intron2")] = true;
077 plusGenic[model.getStateIndex("intron3")] = true;
078
079 minusGenic = new boolean[nStates];
080 for (int j=0; j<nStates; j++) { minusGenic[j] = false; }
081 minusGenic[model.getStateIndex("exon1m")] = true;
082 minusGenic[model.getStateIndex("exon2m")] = true;
083 minusGenic[model.getStateIndex("exon3m")] = true;
084 minusGenic[model.getStateIndex("intron1m")] = true;
085 minusGenic[model.getStateIndex("intron2m")] = true;
086 minusGenic[model.getStateIndex("intron3m")] = true;
087 }
088 @Override
089 public CacheStrategySpec getCacheStrategy() {
090 return new CacheStrategySpec(CacheStrategy.UNSPECIFIED);
091 }
092 }
093