001    package calhoun.analysis.crf.io;
002    
003    import java.io.BufferedReader;
004    import java.io.BufferedWriter;
005    import java.io.File;
006    import java.io.FileReader;
007    import java.io.FileWriter;
008    import java.io.IOException;
009    import java.io.Writer;
010    import java.util.Iterator;
011    import java.util.List;
012    import java.util.Map;
013    
014    import calhoun.util.Assert;
015    import calhoun.util.ErrorException;
016    
017    /** reads in an input consisting of a list of ints that correspond to binary values.  Can be used as a standalone
018     * input component or part of an interleaved input.
019     */
020    public class IntInput extends InterleavedInputComponentBase implements TrainingSequenceIO {
021            private static final long serialVersionUID = 4413724139445660883L;
022            
023            public boolean read(BufferedReader r, Map<String, InputSequence<?>> output) throws IOException {
024                    int[] data = readSequence(r);
025                    if(data == null) {
026                            return false;
027                    }
028                    output.put(name, new InputSequenceInt(data));
029                    return true;
030            }
031            
032            public int[] readSequence(BufferedReader r) throws IOException {
033                    String str = r.readLine();
034                    if(str == null) {
035                            return null;
036                    }
037                    int[] data = new int[str.length()];
038                    try {
039                            for (int i = 0; i < str.length(); ++i) {
040                                    int temp = str.charAt(i) - '0';
041                                    if ( (temp<0) || (temp>9)) {
042                                            temp = str.charAt(i) - 'A' + 10;
043                                            
044                                            if ( (temp<10) || (temp>35)) {
045                                                    temp = str.charAt(i) - 'a' + 36;
046                                                    Assert.a( (temp>=36) && (temp<62), "Offending character was '" + str.charAt(i));
047                                            }
048                                    }
049                                    data[i] = temp;
050                            }
051                    } catch (NumberFormatException ex) {
052                            throw new ErrorException(ex);
053                    }
054                    return data;
055            }
056            
057            public void write(Writer w, Map<String, ? extends InputSequence<?>> data) throws IOException {
058                    writeSequence(w, ((InputSequenceInt) data.get(name)).getData());
059            }
060    
061            public void writeSequence(Writer w, int[] data) throws IOException {
062                    for(int i: data) {
063                            if (i<10) {
064                                    w.write('0'+i);
065                            } else if (i<36) {
066                                    w.write('A'+(i-10));
067                            } else if (i<62) {
068                                    w.write('a'+(i-36));
069                            } else { throw new IOException(); }
070                    }
071                    w.write('\n');
072            }
073    
074            public void readTrainingSequences(Object location, List<TrainingSequence<Map<String, Object>>> seqs) throws IOException {
075                    BufferedReader r = new BufferedReader(new FileReader(new File((String) location)));
076                    Iterator<TrainingSequence<Map<String, Object>>> it = seqs.iterator();
077                    while(r.ready()) {
078                            int[] data = readSequence(r);
079                            if(data == null)
080                                    break;
081                            TrainingSequence<Map<String, Object>> seq = it.next();
082                            seq.setY(data);
083                    }
084            }
085    
086            public void writeTrainingSequences(Object location, Iterator<int[]> data) throws IOException {
087                    BufferedWriter w = new BufferedWriter(new FileWriter(new File((String) location)));
088                    while(data.hasNext()) {
089                            int[] seq = data.next();
090                            writeSequence(w, seq);
091                    }
092                    w.close();
093            }
094    
095            /** Convenience function for creating training sequences in test data. */
096            public static List<? extends TrainingSequence<?>> prepareData(String str) throws Exception {
097                    return new InputHandlerInterleaved(new IntInput(), true).readTrainingData(str) ;
098            }
099    }