001 package calhoun.analysis.crf.io;
002
003 import java.io.File;
004 import java.io.IOException;
005 import java.util.ArrayList;
006 import java.util.Iterator;
007 import java.util.List;
008 import java.util.Map;
009
010 import calhoun.util.Util;
011
012 /** an {@link InputHandler} used when the input is in several files within a single directory. A single {@link InputComponentIO} is used for each
013 * file. A map associates each file name with its {@link InputComponentIO}. For training, hidden sequences are stored in a separate file in the directory whose name is set with the hiddenSequenceFile property.
014 * For this {@link InputHandler}, the location passed is the path to the directory containing the input data.
015 */
016 public class InputHandlerDirectory extends InputHandlerBase {
017 private static final long serialVersionUID = -2969140424776995686L;
018
019 Map<String, InputComponentIO> inputReaders;
020 TrainingSequenceIO hiddenStateReader;
021 String hiddenSequenceFile = "hidden.dat";
022
023 public Iterator<? extends InputSequence<?>> readInputData(String location) throws IOException {
024 List<Map<String, InputSequence<?>>> inputs = new ArrayList();
025
026 // Read in all of the inputs
027 for(Map.Entry<String, InputComponentIO> entry : inputReaders.entrySet()) {
028 entry.getValue().readInputSequences(new File(location, entry.getKey()).getPath(), inputs);
029 }
030
031 return createCompositeInput(inputs);
032 }
033
034 public List<? extends TrainingSequence<?>> readTrainingData(String location) throws IOException {
035 return readTrainingData(location, false);
036 }
037
038 public List<? extends TrainingSequence<?>> readTrainingData(String location, boolean predict) throws IOException {
039 String trainingLocation = new File(location, hiddenSequenceFile).getPath();
040
041 return readTrainingData(location, trainingLocation, hiddenStateReader, predict);
042 }
043
044 public void writeInputData(String location, Iterator<? extends InputSequence<?>> data) throws IOException {
045 // Collect all the values from the iterator into a list
046 // Then for each composite, separate it into a map of its component pieces for handing to the IO class
047 List<Map<String, InputSequence<?>>> compList = new ArrayList<Map<String, InputSequence<?>>>();
048 Util.addAll(compList, new IteratorAdapterInputComponent(data));
049
050 for(Map.Entry<String, InputComponentIO> entry : inputReaders.entrySet()) {
051 entry.getValue().writeInputSequences(new File(location, entry.getKey()).getPath(), compList);
052 }
053 }
054
055 public void writeTrainingData(String location, List<? extends TrainingSequence<?>> data) throws IOException {
056 writeInputData(location, new IteratorAdapterTrainingSequenceInput(data.iterator()));
057
058 List<int[]> trainingSeqs = new ArrayList<int[]>();
059 for(TrainingSequence<?> t : data) {
060 trainingSeqs.add(t.getY());
061 }
062
063 hiddenStateReader.writeTrainingSequences(new File(location, hiddenSequenceFile).getPath(), trainingSeqs.iterator());
064 }
065
066 /** gets the reader used to read in results for training data.
067 * @return the {@link TrainingSequenceIO} used to read in the hidden sequences for training
068 */
069 public TrainingSequenceIO getHiddenStateReader() {
070 return hiddenStateReader;
071 }
072
073 /** sets the reader used to get hidden sequences. Must be set to read in training data.
074 * @param hiddenStateReader the reader that will be used to access hidden states
075 */
076 public void setHiddenStateReader(TrainingSequenceIO hiddenStateReader) {
077 this.hiddenStateReader = hiddenStateReader;
078 }
079
080 /** gets the readers used to read in input sequences. Must be set before any of the <code>read</code> methods are called.
081 * @return the reader used to read in input sequences.
082 */
083 public Map<String, InputComponentIO> getInputReaders() {
084 return inputReaders;
085 }
086
087 /** sets the readers used to read in input sequences. Must be set before any of the <code>read</code> methods are called.
088 * the value is a map that associates filenames within the directory to input components.
089 * @param inputReader the reader used to read in input sequences.
090 */
091 public void setInputReaders(Map<String, InputComponentIO> inputReader) {
092 this.inputReaders = inputReader;
093 }
094
095 /** gets the name of the hidden sequence file. This is the name of the file within the directory where training data will be located.
096 * @return the name of the hidden sequence file.
097 */
098 public String getHiddenSequenceFile() {
099 return hiddenSequenceFile;
100 }
101
102 /** sets the name of the hidden sequence file. This is the name of the file within the directory where training data will be located.
103 * @param hiddenSequenceFile the name of the hidden sequence file within the input directory.
104 */
105 public void setHiddenSequenceFile(String hiddenSequenceFile) {
106 this.hiddenSequenceFile = hiddenSequenceFile;
107 }
108 }