001    package calhoun.seq;
002    
003    import java.io.BufferedReader;
004    import java.io.ByteArrayOutputStream;
005    import java.io.File;
006    import java.io.FileInputStream;
007    import java.io.IOException;
008    import java.io.InputStream;
009    import java.io.InputStreamReader;
010    import java.util.Iterator;
011    import java.util.zip.GZIPInputStream;
012    
013    import org.apache.commons.logging.Log;
014    import org.apache.commons.logging.LogFactory;
015    
016    import calhoun.util.DataException;
017    
018    public class QualityIterator implements Iterator {
019            private static final Log log = LogFactory.getLog(QualityIterator.class);
020    
021            File file;
022            BufferedReader reader;
023            
024            String lastReadHeader;
025            byte [] lastReadQualitySequence;
026            
027            String lastReadLine;
028            byte [] lastReadQuality;
029            boolean endOfFileReached;
030    
031            public QualityIterator(String fileName, boolean isGzipped) throws IOException {
032                    this(new File(fileName), isGzipped);
033            }
034            
035            public QualityIterator(String fileName) throws IOException {
036                    this(new File(fileName), false);
037            }
038            
039            public QualityIterator(BufferedReader reader) {
040                    commonInit(reader);
041            }
042            
043            protected void commonInit(BufferedReader reader) {
044                    endOfFileReached = false;
045                    this.reader = reader;
046                    readNext();
047            }
048            
049            public QualityIterator(File file, boolean isGzipped) throws IOException {
050                    // save the file for error reporting
051                    this.file = file;
052                    
053                    InputStream is = new FileInputStream(file);
054                    
055                    if(isGzipped) {
056                            is = new GZIPInputStream(is);
057                    }
058                    
059                    // create a BufferedReader to read the file
060                    reader = new BufferedReader(new InputStreamReader(is));
061                    commonInit(reader);
062            }
063    
064            /** Returns the next set of qualities in this file. */
065            public Object next() {
066                    return nextQuality();
067            }
068    
069            /** Returns the next set of qualities in this file.  Provides a detailed return type. */
070            public FastaSequence nextQuality() {
071                    SimpleFastaSequence qs = new SimpleFastaSequence();
072                    addNextQuality(qs);
073                    return qs;
074            }
075            
076            /** Adds the next sequence of qualities in the file to the given FastaSequence.  If the FastaSequence already has a sequence set, verifies that the quality matches the sequence (header and length). */
077            public void addNextQuality(SimpleFastaSequence input) {
078                    if(endOfFileReached)
079                            throw new DataException("Attempted to read past end of file "+file.getAbsolutePath());
080    
081                    // If the header is set on the input sequence, verify that it is the same.
082                    if(input.header == null)
083                            input.setHeader(lastReadHeader);
084                    else if(!input.header.equals(lastReadHeader)) {
085                            throw new DataException("Quality file header "+lastReadHeader+" doesn't match sequence header "+input.header);
086                    }
087    
088                    // If the sequence is set on the input sequence, verify that it is the same length.
089                    if(input.hasSequence() && (input.getLength() != lastReadQualitySequence.length)) {
090                            throw new DataException("Quality file length "+lastReadQualitySequence.length+" doesn't match sequence length "+input.getLength()+" for "+input.header);
091                    }
092                            
093                    input.setQuality(lastReadQualitySequence);
094                    
095                    readNext();
096            }
097            
098            public boolean hasNext() {
099                    return !endOfFileReached;
100            }
101            
102            protected void readNext() {
103                    String line;
104                    try {
105                    do {
106                            if(lastReadLine == null)
107                                    line = reader.readLine();
108                            else
109                                    line = lastReadLine;
110                            
111                            if(line == null) {
112                                    endOfFileReached = true;
113                                    return;
114                            }
115                            
116                            line = line.trim();
117                            if(!line.startsWith(">")) {
118                                    throw new DataException("Expected \">\" when reading next quality sequence");
119                            }
120                    } while(line.length() == 0);
121    
122                    lastReadHeader = line.substring(1).trim();
123                    ByteArrayOutputStream os = new ByteArrayOutputStream();
124                    
125                    while(true) {
126                            line = reader.readLine();
127                            if(line == null) {
128                                    lastReadLine = null;
129                                    break;
130                            }
131                            
132                            line = line.trim();
133                            if(line.startsWith(">")) {
134                                    lastReadLine = line;
135                                    break;
136                            }
137                            
138                            if(line.length() > 0) {
139                                    String numbers [] = line.split(" ");
140                                    for(int i=0;i<numbers.length;i++) {
141                                            int val = Integer.parseInt(numbers[i]);
142                                            os.write(val);
143                                    }
144                            }
145                    }
146                    
147                    lastReadQualitySequence = os.toByteArray();
148                    } catch(IOException e) {
149                            throw new DataException("Exception reading "+file.getPath(), e);
150                    }
151            }
152            
153            public void remove() {
154                    throw new UnsupportedOperationException();
155            }
156    }