001    package calhoun.analysis.crf.features.interval29;
002    
003    import java.util.ArrayList;
004    import java.util.List;
005    
006    import org.apache.commons.logging.Log;
007    import org.apache.commons.logging.LogFactory;
008    
009    import calhoun.analysis.crf.ModelManager;
010    import calhoun.analysis.crf.io.TrainingSequence;
011    import calhoun.util.Assert;
012    
013    public class Interval29Tools {
014            private static final Log log = LogFactory.getLog(Interval29Tools.class);
015    
016            static protected enum Constraint {NONE, NEVER, PSTART, PDON, PACC, PSTOP, MSTART, MDON, MACC, MSTOP, PCODE, MCODE, PKEEPE, PKEEPI, MKEEPE, MKEEPI, PSTOPPED, MSTARTED, PWILLSTART, MWILLSTOP};
017            static protected Constraint[] edgeConstraints;
018            static protected Constraint[] nodeConstraints;
019            static int numStates;
020    
021            static {
022                    log.debug("Setting up constraints in Interval29Tools");
023                    
024                    numStates = 29;
025                    
026                    // Setup the node constraints
027                    nodeConstraints = new Constraint[numStates];
028                    for (int j=0; j<numStates; j++) {
029                            nodeConstraints[j] = Constraint.NONE;
030                    }
031                    nodeConstraints[1] = Constraint.PCODE;
032                    nodeConstraints[2] = Constraint.PCODE;
033                    nodeConstraints[3] = Constraint.PCODE;
034                    nodeConstraints[7] = Constraint.MCODE;
035                    nodeConstraints[8] = Constraint.MCODE;
036                    nodeConstraints[9] = Constraint.MCODE;
037                    log.debug("The node constraints are as follows:");
038                    for (int i=0; i<numStates; i++) {
039                            log.debug("  " + i + "  --  " + nodeConstraints[i]);
040                    }
041                    
042                    // setup the edge constraints
043                    // edgeConstraint[i] is the constraint on the edge
044                    //   from state floor(i/numStates)
045                    //   to state i%numStates
046                    edgeConstraints = new Constraint[numStates*numStates];
047                    
048                    // The transition is impossible except when explicitly allowed below
049                    for(int i=0; i<numStates; ++i) {
050                            for(int j=0; j<numStates; ++j) {
051                                    edgeConstraints[i*numStates + j] = Constraint.NEVER;
052                            }
053                    }
054                    
055                    // By default, self-transitions are allowed
056                    for(int i=0; i<numStates; i++) {
057                            edgeConstraints[i*numStates + i] = Constraint.NONE;                     
058                    }
059                    // e-ig -> integenic
060                    edgeConstraints[(14)*numStates + (0)] = Constraint.PSTOPPED;
061                    // em-ig -> intergenic
062                    edgeConstraints[(22)*numStates + (0)] = Constraint.MSTARTED;
063                    for(int i=0; i<3; i++) {
064                            // intergenic -> ig-e
065                            edgeConstraints[(0)*numStates + (i+13)] = Constraint.PWILLSTART; //Constraint.PSTART;
066                            // intergenic -> ig-em
067                            edgeConstraints[(0)*numStates + (i+21)] = Constraint.MWILLSTOP; //Constraint.MSTOP;
068                            
069                            // Put constraints on EXON SIDE of intergenic-exon boundaries
070                            // ig-e_-> e_i
071                            edgeConstraints[(13)*numStates + (i+1)] = Constraint.PSTART;
072                            // ig-em -> e_im
073                            edgeConstraints[(21)*numStates + (i+7)] = Constraint.MSTOP;                     
074                            // e_i -> e-ig
075                            edgeConstraints[(i+1)*numStates + (14)] = Constraint.PSTOP;
076                            // e_im -> em-ig
077                            edgeConstraints[(i+7)*numStates + (22)] = Constraint.MSTART;
078                            
079                            // exon-exon
080                            // e_i -> e_i
081                            edgeConstraints[(i+1)*numStates + (i+1)] = Constraint.PCODE;
082                            // e_im -> e_im
083                            edgeConstraints[(i+7)*numStates + (i+7)] = Constraint.MCODE;
084                                            
085                            // Put constraints on BOTH SIDES of intron-exon boundaries
086                            for(int j=0; j<3; j++) {
087                                    // e_i -> e-i_j
088                                    edgeConstraints[(i+1)*numStates + (j+15)] = Constraint.PDON;
089                                    // i_i -> i-e_j
090                                    edgeConstraints[(i+4)*numStates + (j+18)] = Constraint.PACC;
091                                    // e_im -> em-i_jm
092                                    edgeConstraints[(i+7)*numStates + (j+23)] = Constraint.MACC;
093                                    // i_im -> im-e_jm
094                                    edgeConstraints[(i+10)*numStates + (j+26)] = Constraint.MDON;
095                            }
096                            // e-i_i -> i_i (intron_i, abbr.)
097                            edgeConstraints[(i+15)*numStates + (i+4)] = Constraint.PKEEPI;
098                            // i-e_i -> e_i
099                            edgeConstraints[(i+18)*numStates + (i+1)] = Constraint.PKEEPE;
100                            // em-i_im -> i_im 
101                            edgeConstraints[(i+23)*numStates + (i+10)] = Constraint.MKEEPI;
102                            // im-e_im -> e_im
103                            edgeConstraints[(i+26)*numStates + (i+7)] = Constraint.MKEEPE;
104                    }
105                    
106    //              log.warn("The transition constraints are as follows:");
107    //              for (int i=0; i<numStates; i++) {
108    //                      String s = "";
109    //                      for (int j=0; j<numStates; j++) {
110    //                              s += edgeConstraints[i*numStates + j] + "\t";
111    //                      }
112    //                      System.out.println(s);
113    //                      System.out.println("");
114    //              }
115            }
116            
117            static protected int check012(int x) {
118                    Assert.a(x>=0, "x is " + x);
119                    Assert.a(x<=2, "x is " + x);
120                    return x;
121            }
122            
123            static protected void verify(ModelManager modelInfo) {
124                    Assert.a(modelInfo.getNumStates()==29);
125                    
126                    Assert.a(modelInfo.getStateName(0).equals("intergenic"));
127                    Assert.a(modelInfo.getStateName(1).equals("exon0"));
128                    Assert.a(modelInfo.getStateName(2).equals("exon1"));
129                    Assert.a(modelInfo.getStateName(3).equals("exon2"));
130                    Assert.a(modelInfo.getStateName(4).equals("intron0"));
131                    Assert.a(modelInfo.getStateName(5).equals("intron1"));
132                    Assert.a(modelInfo.getStateName(6).equals("intron2"));
133                    Assert.a(modelInfo.getStateName(7).equals("exon0m"));
134                    Assert.a(modelInfo.getStateName(8).equals("exon1m"));
135                    Assert.a(modelInfo.getStateName(9).equals("exon2m"));
136                    Assert.a(modelInfo.getStateName(10).equals("intron0m"));
137                    Assert.a(modelInfo.getStateName(11).equals("intron1m"));
138                    Assert.a(modelInfo.getStateName(12).equals("intron2m"));
139                    // XXX: add Asserts for rest of states
140            }
141            
142            static protected List<TrainingSequence<?>> checkValidTransitions(List<? extends TrainingSequence<?>> data) {
143                    List<TrainingSequence<?>> goodData = new ArrayList<TrainingSequence<?>>();
144                    for(TrainingSequence<?> seq : data) {
145                            boolean validSequence = true;
146                            for (int pos=1; pos<seq.length(); pos++) { // note start at one not zero, so can look back at prevState
147                                    int state = seq.getY(pos);
148                                    int prevState = seq.getY(pos-1);
149                                    if (Interval29Tools.edgeConstraints[prevState*Interval29Tools.numStates + state] == Interval29Tools.Constraint.NEVER) {
150                                            System.out.println("bad: " + prevState + " " + state);
151                                            validSequence = false;
152                                            //Assert.a(false,"pos = "+pos+" prevState = " + modelInfo.getStateName(prevState) + "   State = " + modelInfo.getStateName(state));  // A nice side effect of making sure the input sequence is legal, can omit this if you want to.
153                                            break;
154                                    }
155                            }
156                            if (validSequence) {
157                                    goodData.add(seq);
158                            }
159                    }
160                    return goodData;
161            }
162    }