001 package calhoun.analysis.crf.io;
002
003 import java.io.File;
004 import java.io.IOException;
005 import java.util.ArrayList;
006 import java.util.Iterator;
007 import java.util.List;
008 import java.util.Map;
009
010 import calhoun.util.Util;
011
012 /** an {@link InputHandler} used when all of the input is in a single file. A single {@link InputComponentIO} is used to read the
013 * file. For training, hidden sequences are stored in a separate file whose name is related to the input file name using a
014 * {@link FilenameMapper}. The same filename mapping is used to determine the training set file name when writing out data
015 * as when reading it in. The training file is read using a {@link TrainingSequenceIO}.<p>
016 * For this {@link InputHandler}, the location passed is the path to the file containing the input data.
017 */
018 public class InputHandlerFile extends InputHandlerBase {
019 private static final long serialVersionUID = -2969140424776995686L;
020
021 InputComponentIO inputReader;
022 FilenameMapper mapper;
023 TrainingSequenceIO hiddenStateReader;
024
025 public Iterator<? extends InputSequence<?>> readInputData(String location) throws IOException {
026 List<Map<String, InputSequence<?>>> inputs = new ArrayList();
027 inputReader.readInputSequences(location, inputs);
028
029 return createCompositeInput(inputs);
030 }
031
032 public List<? extends TrainingSequence<?>> readTrainingData(String location) throws IOException {
033 return readTrainingData(location, false);
034 }
035
036 public List<? extends TrainingSequence<?>> readTrainingData(String location, boolean predict) throws IOException {
037 String trainingLocation = mapper.mapFilename(new File(location)).getPath();
038 return readTrainingData(location, trainingLocation, hiddenStateReader, predict);
039 }
040
041 public void writeInputData(String location, Iterator<? extends InputSequence<?>> data) throws IOException {
042 // Collect all the values from the iterator into a list
043 // Then for each composite, separate it into a map of its component pieces for handing to the IO class
044 List<Map<String, InputSequence<?>>> compList = new ArrayList<Map<String, InputSequence<?>>>();
045 Util.addAll(compList, new IteratorAdapterInputComponent(data));
046
047 inputReader.writeInputSequences(location, compList);
048 }
049
050 public void writeTrainingData(String location, List<? extends TrainingSequence<?>> data) throws IOException {
051 writeInputData(location, data.iterator());
052
053 String trainingLocation = mapper.mapFilename(new File(location)).getPath();
054
055 List<int[]> trainingSeqs = new ArrayList<int[]>();
056 for(TrainingSequence<?> t : data) {
057 trainingSeqs.add(t.getY());
058 }
059
060 hiddenStateReader.writeTrainingSequences(trainingLocation, trainingSeqs.iterator());
061 }
062
063 /** gets the reader used to read in results for training data.
064 * @return the {@link TrainingSequenceIO} used to read in the hidden sequences for training
065 */
066 public TrainingSequenceIO getHiddenStateReader() {
067 return hiddenStateReader;
068 }
069
070 /** sets the reader used to get hidden sequences. Must be set to read in training data.
071 * @param hiddenStateReader the reader that will be used to access hidden states
072 */
073 public void setHiddenStateReader(TrainingSequenceIO hiddenStateReader) {
074 this.hiddenStateReader = hiddenStateReader;
075 }
076
077 /** gets the reader used to read in input sequences. Must be set before any of the <code>read</code> methods are called.
078 * @return the reader used to read in input sequences.
079 */
080 public InputComponentIO getInputReader() {
081 return inputReader;
082 }
083
084 /** gets the reader used to read in input sequences. Must be set before any of the <code>read</code> methods are called.
085 * @param inputReader the reader used to read in input sequences.
086 */
087 public void setInputReader(InputComponentIO inputReader) {
088 this.inputReader = inputReader;
089 }
090
091 /** the mapper used to generate the name of the hidden sequence file from the input sequence file.
092 * Must be set to read in training data.
093 * @return the mapper used to generate the hidden sequence file name.
094 */
095 public FilenameMapper getMapper() {
096 return mapper;
097 }
098
099 /** the mapper used to generate the name of the hidden sequence file from the input sequence file.
100 * @param mapper the mapper used to generate the hidden sequence file name.
101 */
102 public void setMapper(FilenameMapper mapper) {
103 this.mapper = mapper;
104 }
105 }