001 package calhoun.analysis.crf.io;
002
003 import java.io.BufferedReader;
004 import java.io.BufferedWriter;
005 import java.io.File;
006 import java.io.FileReader;
007 import java.io.FileWriter;
008 import java.io.IOException;
009 import java.io.Writer;
010 import java.util.Iterator;
011 import java.util.List;
012 import java.util.Map;
013
014 import calhoun.util.Assert;
015 import calhoun.util.ErrorException;
016
017 /** reads in an input consisting of a list of ints that correspond to binary values. Can be used as a standalone
018 * input component or part of an interleaved input.
019 */
020 public class IntInput extends InterleavedInputComponentBase implements TrainingSequenceIO {
021 private static final long serialVersionUID = 4413724139445660883L;
022
023 public boolean read(BufferedReader r, Map<String, InputSequence<?>> output) throws IOException {
024 int[] data = readSequence(r);
025 if(data == null) {
026 return false;
027 }
028 output.put(name, new InputSequenceInt(data));
029 return true;
030 }
031
032 public int[] readSequence(BufferedReader r) throws IOException {
033 String str = r.readLine();
034 if(str == null) {
035 return null;
036 }
037 int[] data = new int[str.length()];
038 try {
039 for (int i = 0; i < str.length(); ++i) {
040 int temp = str.charAt(i) - '0';
041 if ( (temp<0) || (temp>9)) {
042 temp = str.charAt(i) - 'A' + 10;
043
044 if ( (temp<10) || (temp>35)) {
045 temp = str.charAt(i) - 'a' + 36;
046 Assert.a( (temp>=36) && (temp<62), "Offending character was '" + str.charAt(i));
047 }
048 }
049 data[i] = temp;
050 }
051 } catch (NumberFormatException ex) {
052 throw new ErrorException(ex);
053 }
054 return data;
055 }
056
057 public void write(Writer w, Map<String, ? extends InputSequence<?>> data) throws IOException {
058 writeSequence(w, ((InputSequenceInt) data.get(name)).getData());
059 }
060
061 public void writeSequence(Writer w, int[] data) throws IOException {
062 for(int i: data) {
063 if (i<10) {
064 w.write('0'+i);
065 } else if (i<36) {
066 w.write('A'+(i-10));
067 } else if (i<62) {
068 w.write('a'+(i-36));
069 } else { throw new IOException(); }
070 }
071 w.write('\n');
072 }
073
074 public void readTrainingSequences(Object location, List<TrainingSequence<Map<String, Object>>> seqs) throws IOException {
075 BufferedReader r = new BufferedReader(new FileReader(new File((String) location)));
076 Iterator<TrainingSequence<Map<String, Object>>> it = seqs.iterator();
077 while(r.ready()) {
078 int[] data = readSequence(r);
079 if(data == null)
080 break;
081 TrainingSequence<Map<String, Object>> seq = it.next();
082 seq.setY(data);
083 }
084 }
085
086 public void writeTrainingSequences(Object location, Iterator<int[]> data) throws IOException {
087 BufferedWriter w = new BufferedWriter(new FileWriter(new File((String) location)));
088 while(data.hasNext()) {
089 int[] seq = data.next();
090 writeSequence(w, seq);
091 }
092 w.close();
093 }
094
095 /** Convenience function for creating training sequences in test data. */
096 public static List<? extends TrainingSequence<?>> prepareData(String str) throws Exception {
097 return new InputHandlerInterleaved(new IntInput(), true).readTrainingData(str) ;
098 }
099 }