001    package calhoun.analysis.crf.features.interval29;
002    
003    import java.util.List;
004    
005    import org.apache.commons.logging.Log;
006    import org.apache.commons.logging.LogFactory;
007    
008    import calhoun.analysis.crf.AbstractFeatureManager;
009    import calhoun.analysis.crf.CacheStrategySpec;
010    import calhoun.analysis.crf.FeatureList;
011    import calhoun.analysis.crf.FeatureManagerNode;
012    import calhoun.analysis.crf.ModelManager;
013    import calhoun.analysis.crf.CacheStrategySpec.CacheStrategy;
014    import calhoun.analysis.crf.features.interval13.FootprintsInterval13;
015    import calhoun.analysis.crf.io.InputSequence;
016    import calhoun.analysis.crf.io.TrainingSequence;
017    import calhoun.analysis.crf.io.MultipleAlignmentInputSequence.MultipleAlignmentColumn;
018    import calhoun.seq.KmerHasher;
019    import calhoun.util.Assert;
020    
021    public class FootprintsInterval29 extends AbstractFeatureManager<MultipleAlignmentColumn> implements FeatureManagerNode<MultipleAlignmentColumn> {
022            private static final long serialVersionUID = -885708304411544895L;
023            private static final Log log = LogFactory.getLog(FootprintsInterval29.class);
024            boolean debug = log.isDebugEnabled();
025            
026            /* Features are the conjunction of "species X is present in multiple alignment" with hidden state is "exonic, intronic, intergenic"
027             * Is the number of features allowed to depend on the number of species inmultiple alignment??
028             */
029    
030            List<String> speciesNames;
031            int startIx;  
032            ModelManager model;
033            KmerHasher h = new KmerHasher(KmerHasher.ACGTN,1);
034            
035            int maxSeqLength;
036            
037            int nFeatures = -1;
038            
039            Boolean[] isStateCoding, isStateIntronic, isStateIntergenic;
040    
041            
042            public FootprintsInterval29() { 
043            }
044    
045            public int getNumFeatures() {
046                    return nFeatures;
047            }       
048            
049            public String getFeatureName(int featureIndex) {
050                    String[] type = new String[] { "intergenic", "exonic", "intronic"};
051                    int raw = featureIndex - startIx;
052                    Assert.a(raw<nFeatures);
053                    if(speciesNames == null) {
054                            return "Species "+((raw/3) + 1) + " "+type[raw%3]+" footprint";
055                    }
056                    return speciesNames.get((raw/3) + 1) + " "+type[raw%3]+" footprint";
057            }
058            
059            
060            public void evaluateNode(InputSequence<? extends MultipleAlignmentColumn> seq, int pos, int state, FeatureList result) {
061                    MultipleAlignmentColumn mac = seq.getX(pos);
062                    for (int species = 1; species<mac.numSpecies(); species++) {
063                            if (mac.nucleotide(species) == '-') continue;
064                            
065                            if (isStateIntergenic[state]) { result.addFeature(startIx+((species-1)*3+0), 1.0); }
066                            if (isStateCoding[state])     { result.addFeature(startIx+((species-1)*3+1), 1.0); }
067                            if (isStateIntronic[state])   { result.addFeature(startIx+((species-1)*3+2), 1.0); }
068                    }
069            }
070    
071    
072            public void train(int startingIndex, ModelManager modelInfo, List<? extends TrainingSequence<? extends MultipleAlignmentColumn>> data) {
073                    TrainingSequence<? extends MultipleAlignmentColumn> seq = data.get(0);
074                    speciesNames = seq.getX(0).getMultipleAlignment().getSpeciesNames();
075                    
076                    startIx = startingIndex;
077                    model = modelInfo;
078                    int nStates = model.getNumStates();
079    
080                    nFeatures = 3*(data.get(0).getX(0).numSpecies()-1);  // Assumes this is the same for all alignments
081                                                    
082                    isStateCoding = new Boolean[nStates];       for (int j=0; j<nStates; j++) { isStateCoding[j] = false; }
083                    isStateCoding[1] = true;
084                    isStateCoding[2] = true;
085                    isStateCoding[3] = true;
086                    isStateCoding[7] = true;
087                    isStateCoding[8] = true;
088                    isStateCoding[9] = true;                
089    
090                    isStateIntronic = new Boolean[nStates];     for (int j=0; j<nStates; j++) { isStateIntronic[j] = false; }
091                    isStateIntronic[4] = true;
092                    isStateIntronic[5] = true;
093                    isStateIntronic[6] = true;
094                    isStateIntronic[10] = true;
095                    isStateIntronic[11] = true;
096                    isStateIntronic[12] = true;
097                    isStateIntronic[15] = true;
098                    isStateIntronic[16] = true;
099                    isStateIntronic[17] = true;
100                    isStateIntronic[18] = true;
101                    isStateIntronic[19] = true;
102                    isStateIntronic[20] = true;
103                    isStateIntronic[23] = true;
104                    isStateIntronic[24] = true;
105                    isStateIntronic[25] = true;
106                    isStateIntronic[26] = true;
107                    isStateIntronic[27] = true;
108                    isStateIntronic[28] = true;
109    
110                    isStateIntergenic = new Boolean[nStates];   for (int j=0; j<nStates; j++) { isStateIntergenic[j] = false; }
111                    isStateIntergenic[0] = true;
112                    isStateIntergenic[13] = true;
113                    isStateIntergenic[14] = true;
114                    isStateIntergenic[21] = true;
115                    isStateIntergenic[22] = true;
116                    
117            }
118            @Override
119            public CacheStrategySpec getCacheStrategy() {
120                    return new CacheStrategySpec(CacheStrategy.DENSE);
121            }
122    }