001    package calhoun.analysis.crf.io;
002    
003    import java.io.File;
004    import java.io.IOException;
005    import java.util.ArrayList;
006    import java.util.Iterator;
007    import java.util.List;
008    import java.util.Map;
009    
010    import calhoun.util.Util;
011    
012    /** an {@link InputHandler} used when all of the input is in a single file.  A single {@link InputComponentIO} is used to read the
013     * file.  For training, hidden sequences are stored in a separate file whose name is related to the input file name using a
014     * {@link FilenameMapper}.  The same filename mapping is used to determine the training set file name when writing out data
015     * as when reading it in.  The training file is read using a {@link TrainingSequenceIO}.<p>
016     * For this {@link InputHandler}, the location passed is the path to the file containing the input data.
017      */
018    public class InputHandlerFile extends InputHandlerBase {
019            private static final long serialVersionUID = -2969140424776995686L;
020            
021            InputComponentIO inputReader;
022            FilenameMapper mapper;
023            TrainingSequenceIO hiddenStateReader;
024            
025            public Iterator<? extends InputSequence<?>> readInputData(String location) throws IOException {
026                    List<Map<String, InputSequence<?>>> inputs = new ArrayList();
027                    inputReader.readInputSequences(location, inputs);
028    
029                    return createCompositeInput(inputs);
030            }
031    
032            public List<? extends TrainingSequence<?>> readTrainingData(String location) throws IOException {
033                    return readTrainingData(location, false);
034            }
035            
036            public List<? extends TrainingSequence<?>> readTrainingData(String location, boolean predict) throws IOException {
037                    String trainingLocation = mapper.mapFilename(new File(location)).getPath();
038                    return readTrainingData(location, trainingLocation, hiddenStateReader, predict);
039            }
040    
041            public void writeInputData(String location, Iterator<? extends InputSequence<?>> data) throws IOException {
042                    // Collect all the values from the iterator into a list
043                    // Then for each composite, separate it into a map of its component pieces for handing to the IO class
044                    List<Map<String, InputSequence<?>>> compList = new ArrayList<Map<String, InputSequence<?>>>();
045                    Util.addAll(compList, new IteratorAdapterInputComponent(data));
046    
047                    inputReader.writeInputSequences(location, compList);
048            }
049    
050            public void writeTrainingData(String location, List<? extends TrainingSequence<?>> data) throws IOException {
051                    writeInputData(location, data.iterator());
052    
053                    String trainingLocation = mapper.mapFilename(new File(location)).getPath();
054                    
055                    List<int[]> trainingSeqs = new ArrayList<int[]>();
056                    for(TrainingSequence<?> t : data) {
057                            trainingSeqs.add(t.getY());
058                    }
059                    
060                    hiddenStateReader.writeTrainingSequences(trainingLocation, trainingSeqs.iterator());
061            }
062    
063            /** gets the reader used to read in results for training data.
064             * @return the {@link TrainingSequenceIO} used to read in the hidden sequences for training
065             */
066            public TrainingSequenceIO getHiddenStateReader() {
067                    return hiddenStateReader;
068            }
069    
070            /** sets the reader used to get hidden sequences.  Must be set to read in training data.
071             * @param hiddenStateReader the reader that will be used to access hidden states
072             */
073            public void setHiddenStateReader(TrainingSequenceIO hiddenStateReader) {
074                    this.hiddenStateReader = hiddenStateReader;
075            }
076            
077            /** gets the reader used to read in input sequences.  Must be set before any of the <code>read</code> methods are called.
078             * @return the reader used to read in input sequences.
079             */
080            public InputComponentIO getInputReader() {
081                    return inputReader;
082            }
083            
084            /** gets the reader used to read in input sequences.  Must be set before any of the <code>read</code> methods are called.
085             * @param inputReader the reader used to read in input sequences.
086             */
087            public void setInputReader(InputComponentIO inputReader) {
088                    this.inputReader = inputReader;
089            }
090            
091            /** the mapper used to generate the name of the hidden sequence file from the input sequence file.  
092             * Must be set to read in training data.
093             * @return the mapper used to generate the hidden sequence file name.
094             */
095            public FilenameMapper getMapper() {
096                    return mapper;
097            }
098    
099            /** the mapper used to generate the name of the hidden sequence file from the input sequence file.
100             * @param mapper the mapper used to generate the hidden sequence file name.
101             */
102            public void setMapper(FilenameMapper mapper) {
103                    this.mapper = mapper;
104            }
105    }