001    package calhoun.analysis.crf.io;
002    
003    import java.io.File;
004    import java.io.IOException;
005    import java.util.ArrayList;
006    import java.util.Iterator;
007    import java.util.List;
008    import java.util.Map;
009    
010    import calhoun.util.Util;
011    
012    /** an {@link InputHandler} used when the input is in several files within a single directory.  A single {@link InputComponentIO} is used for each
013     * file.  A map associates each file name with its {@link InputComponentIO}.  For training, hidden sequences are stored in a separate file in the directory whose name is set with the hiddenSequenceFile property.
014     * For this {@link InputHandler}, the location passed is the path to the directory containing the input data.
015      */
016    public class InputHandlerDirectory extends InputHandlerBase {
017            private static final long serialVersionUID = -2969140424776995686L;
018            
019            Map<String, InputComponentIO> inputReaders;
020            TrainingSequenceIO hiddenStateReader;
021            String hiddenSequenceFile = "hidden.dat";
022    
023            public Iterator<? extends InputSequence<?>> readInputData(String location) throws IOException {
024                    List<Map<String, InputSequence<?>>> inputs = new ArrayList();
025    
026                    // Read in all of the inputs
027                    for(Map.Entry<String, InputComponentIO> entry : inputReaders.entrySet()) {
028                            entry.getValue().readInputSequences(new File(location, entry.getKey()).getPath(), inputs);
029                    }
030    
031                    return createCompositeInput(inputs);
032            }
033    
034            public List<? extends TrainingSequence<?>> readTrainingData(String location) throws IOException {
035                    return readTrainingData(location, false);
036            }
037            
038            public List<? extends TrainingSequence<?>> readTrainingData(String location, boolean predict) throws IOException {
039                    String trainingLocation = new File(location, hiddenSequenceFile).getPath();
040    
041                    return readTrainingData(location, trainingLocation, hiddenStateReader, predict);
042            }
043    
044            public void writeInputData(String location, Iterator<? extends InputSequence<?>> data) throws IOException {
045                    // Collect all the values from the iterator into a list
046                    // Then for each composite, separate it into a map of its component pieces for handing to the IO class
047                    List<Map<String, InputSequence<?>>> compList = new ArrayList<Map<String, InputSequence<?>>>();
048                    Util.addAll(compList, new IteratorAdapterInputComponent(data));
049    
050                    for(Map.Entry<String, InputComponentIO> entry : inputReaders.entrySet()) {
051                            entry.getValue().writeInputSequences(new File(location, entry.getKey()).getPath(), compList);
052                    }
053            }
054    
055            public void writeTrainingData(String location, List<? extends TrainingSequence<?>> data) throws IOException {
056                    writeInputData(location, new IteratorAdapterTrainingSequenceInput(data.iterator()));
057    
058                    List<int[]> trainingSeqs = new ArrayList<int[]>();
059                    for(TrainingSequence<?> t : data) {
060                            trainingSeqs.add(t.getY());
061                    }
062                    
063                    hiddenStateReader.writeTrainingSequences(new File(location, hiddenSequenceFile).getPath(), trainingSeqs.iterator());
064            }
065    
066            /** gets the reader used to read in results for training data.
067             * @return the {@link TrainingSequenceIO} used to read in the hidden sequences for training
068             */
069            public TrainingSequenceIO getHiddenStateReader() {
070                    return hiddenStateReader;
071            }
072    
073            /** sets the reader used to get hidden sequences.  Must be set to read in training data.
074             * @param hiddenStateReader the reader that will be used to access hidden states
075             */
076            public void setHiddenStateReader(TrainingSequenceIO hiddenStateReader) {
077                    this.hiddenStateReader = hiddenStateReader;
078            }
079            
080            /** gets the readers used to read in input sequences.  Must be set before any of the <code>read</code> methods are called.
081             * @return the reader used to read in input sequences.
082             */
083            public Map<String, InputComponentIO> getInputReaders() {
084                    return inputReaders;
085            }
086            
087            /** sets the readers used to read in input sequences.  Must be set before any of the <code>read</code> methods are called.
088             * the value is a map that associates filenames within the directory to input components.
089             * @param inputReader the reader used to read in input sequences.
090             */
091            public void setInputReaders(Map<String, InputComponentIO> inputReader) {
092                    this.inputReaders = inputReader;
093            }
094            
095            /** gets the name of the hidden sequence file.  This is the name of the file within the directory where training data will be located.
096             * @return the name of the hidden sequence file.
097             */
098            public String getHiddenSequenceFile() {
099                    return hiddenSequenceFile;
100            }
101    
102            /** sets the name of the hidden sequence file.  This is the name of the file within the directory where training data will be located.
103             * @param hiddenSequenceFile the name of the hidden sequence file within the input directory.
104             */
105            public void setHiddenSequenceFile(String hiddenSequenceFile) {
106                    this.hiddenSequenceFile = hiddenSequenceFile;
107            }
108    }