001 package calhoun.seq;
002
003 import java.io.BufferedReader;
004 import java.io.ByteArrayOutputStream;
005 import java.io.File;
006 import java.io.FileInputStream;
007 import java.io.IOException;
008 import java.io.InputStream;
009 import java.io.InputStreamReader;
010 import java.util.Iterator;
011 import java.util.zip.GZIPInputStream;
012
013 import org.apache.commons.logging.Log;
014 import org.apache.commons.logging.LogFactory;
015
016 import calhoun.util.DataException;
017
018 public class QualityIterator implements Iterator {
019 private static final Log log = LogFactory.getLog(QualityIterator.class);
020
021 File file;
022 BufferedReader reader;
023
024 String lastReadHeader;
025 byte [] lastReadQualitySequence;
026
027 String lastReadLine;
028 byte [] lastReadQuality;
029 boolean endOfFileReached;
030
031 public QualityIterator(String fileName, boolean isGzipped) throws IOException {
032 this(new File(fileName), isGzipped);
033 }
034
035 public QualityIterator(String fileName) throws IOException {
036 this(new File(fileName), false);
037 }
038
039 public QualityIterator(BufferedReader reader) {
040 commonInit(reader);
041 }
042
043 protected void commonInit(BufferedReader reader) {
044 endOfFileReached = false;
045 this.reader = reader;
046 readNext();
047 }
048
049 public QualityIterator(File file, boolean isGzipped) throws IOException {
050 // save the file for error reporting
051 this.file = file;
052
053 InputStream is = new FileInputStream(file);
054
055 if(isGzipped) {
056 is = new GZIPInputStream(is);
057 }
058
059 // create a BufferedReader to read the file
060 reader = new BufferedReader(new InputStreamReader(is));
061 commonInit(reader);
062 }
063
064 /** Returns the next set of qualities in this file. */
065 public Object next() {
066 return nextQuality();
067 }
068
069 /** Returns the next set of qualities in this file. Provides a detailed return type. */
070 public FastaSequence nextQuality() {
071 SimpleFastaSequence qs = new SimpleFastaSequence();
072 addNextQuality(qs);
073 return qs;
074 }
075
076 /** Adds the next sequence of qualities in the file to the given FastaSequence. If the FastaSequence already has a sequence set, verifies that the quality matches the sequence (header and length). */
077 public void addNextQuality(SimpleFastaSequence input) {
078 if(endOfFileReached)
079 throw new DataException("Attempted to read past end of file "+file.getAbsolutePath());
080
081 // If the header is set on the input sequence, verify that it is the same.
082 if(input.header == null)
083 input.setHeader(lastReadHeader);
084 else if(!input.header.equals(lastReadHeader)) {
085 throw new DataException("Quality file header "+lastReadHeader+" doesn't match sequence header "+input.header);
086 }
087
088 // If the sequence is set on the input sequence, verify that it is the same length.
089 if(input.hasSequence() && (input.getLength() != lastReadQualitySequence.length)) {
090 throw new DataException("Quality file length "+lastReadQualitySequence.length+" doesn't match sequence length "+input.getLength()+" for "+input.header);
091 }
092
093 input.setQuality(lastReadQualitySequence);
094
095 readNext();
096 }
097
098 public boolean hasNext() {
099 return !endOfFileReached;
100 }
101
102 protected void readNext() {
103 String line;
104 try {
105 do {
106 if(lastReadLine == null)
107 line = reader.readLine();
108 else
109 line = lastReadLine;
110
111 if(line == null) {
112 endOfFileReached = true;
113 return;
114 }
115
116 line = line.trim();
117 if(!line.startsWith(">")) {
118 throw new DataException("Expected \">\" when reading next quality sequence");
119 }
120 } while(line.length() == 0);
121
122 lastReadHeader = line.substring(1).trim();
123 ByteArrayOutputStream os = new ByteArrayOutputStream();
124
125 while(true) {
126 line = reader.readLine();
127 if(line == null) {
128 lastReadLine = null;
129 break;
130 }
131
132 line = line.trim();
133 if(line.startsWith(">")) {
134 lastReadLine = line;
135 break;
136 }
137
138 if(line.length() > 0) {
139 String numbers [] = line.split(" ");
140 for(int i=0;i<numbers.length;i++) {
141 int val = Integer.parseInt(numbers[i]);
142 os.write(val);
143 }
144 }
145 }
146
147 lastReadQualitySequence = os.toByteArray();
148 } catch(IOException e) {
149 throw new DataException("Exception reading "+file.getPath(), e);
150 }
151 }
152
153 public void remove() {
154 throw new UnsupportedOperationException();
155 }
156 }