001 package calhoun.analysis.crf.io;
002
003 import java.util.Collection;
004 import java.util.HashMap;
005
006 import calhoun.util.Assert;
007
008 /** represents an input sequence that also contains a sequence of hidden states.
009 * @param <A> the elements which make up the input sequence which is part of this training sequence
010 */
011 public class TrainingSequence<A> implements InputSequence<A> {
012 private static final long serialVersionUID = -443081006327395045L;
013 InputSequence<? extends A> x;
014 int[] y;
015 HashMap<String,TrainingSequence> componentMap;
016
017 /** constructs a training sequence using just the input sequence. The hidden sequence needs to be added later
018 * using setY.
019 * @param xArg the input sequence the hidden states correspond to
020 */
021 public TrainingSequence(InputSequence<? extends A> xArg) {
022 this.x = xArg;
023 }
024
025 /** constructs a training sequence using these input and hidden sequences
026 * @param xArg the input sequence the hidden states correspond to
027 * @param yArg the state indices of the values of the hidden states
028 */
029 public TrainingSequence(InputSequence<? extends A> xArg, int[] yArg) {
030 this.x = xArg;
031 this.y = yArg;
032 Assert.a(y.length == x.length(), "Lengths differ between input and training sequences. Hidden = " + y.length + " Observed = " + x.length());
033 }
034
035 /** gets the underlying input sequence
036 * @return the underlying input sequence
037 */
038 public InputSequence<? extends A> getInputSequence() {
039 return x;
040 }
041
042 /** gets the vector of hidden state indices
043 * @return an array of ints containing the index of the hidden state at each position in the sequence */
044 public int[] getY() {
045 return y;
046 }
047
048 /** sets the vector of hidden state indices. Must be the same length as the previsouly specified inputs
049 * @param hiddenStates an array of ints containing the index of the hidden state at each position in the sequence */
050 public void setY(int[] hiddenStates) {
051 Assert.a(hiddenStates.length == x.length(), "Lengths differ between input and training sequences. Hidden = " + hiddenStates.length + " Observed = " + x.length());
052 y = hiddenStates;
053 }
054
055 /** gets the hidden state index at a particular position
056 * @param x1 the 0-based position at which to get the hidden state index
057 * @return the hidden state index at this position
058 */
059 public int getY(int x1) {
060 return y[x1];
061 }
062
063 /** sets the hidden state index at a particular position
064 * @param x the 0-based position at which to get the hidden state index
065 * @param z the vlaue of the hidden state index to set for this position
066 */
067 public void setY(int x,int z) {
068 y[x] = z;
069 }
070
071 /** gets the value of the underlying input sequence at a particular position
072 * @param ix a zero-based index into the input sequence
073 * @return the value of the input sequence at the position
074 */
075 public A getX(int ix) {
076 return x.getX(ix);
077 }
078
079 /** gets the length of the input and training sequences
080 * @return the length of the training sequence
081 */
082 public int length() {
083 return x == null ? 0 : x.length();
084 }
085
086 public InputSequence<?> getComponent(String name) {
087 return getTrainingComponent(name);
088 }
089
090 public Collection<String> listComponents() {
091 return x.listComponents();
092 }
093
094 /** returns a new TrainingSequence created by taking a single component of the input sequence and
095 * pairing it with the hidden states for this Training Sequence. The input sequence must be a composite.
096 * @param name the name of the component to extract from the composite input sequence
097 * @return a new TrainingSequence created from teh extracted input sequence component
098 */
099 public TrainingSequence getTrainingComponent(String name) {
100 if(componentMap == null) {
101 componentMap = new HashMap<String, TrainingSequence>();
102 }
103 TrainingSequence ret = componentMap.get(name);
104 if(ret == null) {
105 ret = new TrainingSequence(x.getComponent(name), y);
106 componentMap.put(name, ret);
107 }
108 return ret;
109 }
110
111 public TrainingSequence<A> subSequence(int start, int end) {
112 Assert.a(start >= 1);
113 Assert.a(end <= this.length());
114 Assert.a(start <= end);
115 int[] newdata = new int[end-start+1];
116 for (int j=0; j<(end-start+1); j++) {
117 newdata[j] = y[j+start-1];
118 }
119
120 TrainingSequence<A> TS = new TrainingSequence<A>(this.x.subSequence(start,end),newdata);
121 return TS;
122 }
123
124 @Override
125 public String toString() {
126 return x+ " + training.";
127 }
128 }