001    /*
002     * The Broad Institute
003     * 
004     * SOFTWARE COPYRIGHT NOTICE AGREEMENT
005     * 
006     * This software and its documentation are copyright 2004 by the
007     * Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
008     * 
009     * This software is supplied without any warranty or guaranteed support whatsoever.
010     * Neither the Broad Institute nor MIT can be responsible for its use, misuse, or functionality. 
011     * 
012     * Created on Jul 2, 2004
013     */
014    package calhoun.seq;
015    
016    import java.io.BufferedReader;
017    import java.io.File;
018    import java.io.FileReader;
019    import java.io.IOException;
020    import java.util.Iterator;
021    import java.util.NoSuchElementException;
022    
023    import calhoun.util.ConfigException;
024    import calhoun.util.ErrorException;
025    
026    /**
027     * This class implements iterator and allows a fasta file to be 
028     * iterated through without having to read anything.  Can also take a second file containing quality.
029     * 
030     * You can now use new FastaReader().iterator() instead.
031     */
032    public class FastaIterator implements Iterator<FastaSequence> {
033            
034            public static final String HEADER_START = ">";
035            
036            private File file;      
037            private BufferedReader reader;  
038            private String  nextHeader;
039            private QualityIterator qualityIt;
040    
041            public FastaIterator(String file) throws IOException {
042                    this(new File(file), null);
043            }
044            
045            public FastaIterator(File file) throws IOException {
046                    this(file, null);
047            }
048            
049            public FastaIterator(String sequence, String quality) throws IOException {
050                    this(new File(sequence), quality == null ? null : new File(quality));
051            }
052    
053            /** Creates an iterator over a sequence file and also possibly a quality file */
054            public FastaIterator(File sequence, File quality) throws IOException {
055                    
056                    // save the file for error reporting
057                    this.file = sequence;
058                    if(!file.exists())
059                            throw new IOException("The fasta file \""+file+"\" does not exist");
060                    
061                    if(file.length() == 0) {
062                            nextHeader = null;
063                            return;
064                    }
065            
066                    if(quality != null) {
067                            qualityIt = new QualityIterator(quality, false);
068                    }
069                    
070                    // create a BufferedReader to read the file
071                    reader = new BufferedReader(new FileReader(file));
072                    
073                    // read in the first line
074                    String nextHeaderLine;
075                    do {
076                            nextHeaderLine = reader.readLine();
077                    } while(nextHeaderLine.trim().length() == 0);
078                    
079                    // validate the line
080                    if (nextHeaderLine.startsWith(HEADER_START) == false)
081                            throw new ConfigException(file + " is not a valid FASTA file");
082                            
083                    // clean up the header line
084                    nextHeader = nextHeaderLine.substring(1).trim();
085            }
086            
087            /** Throws an UnsupportedOperationException. */
088            public void remove() {
089                    throw new UnsupportedOperationException("can't remove items from a FastaIterator");
090            }
091    
092            public boolean hasNext() {              
093                    return nextHeader != null;
094            }
095            
096            public FastaSequence next() {
097                    
098                    if (nextHeader == null)
099                            throw new NoSuchElementException();
100                    
101                    ReaderSequence seq = new ReaderSequence(file, nextHeader);
102                    
103                    try {
104                            nextHeader = seq.loadSequence(reader);
105                    }
106                    catch (Exception e) {
107                            throw new RuntimeException("error reading " + this.file, e);
108                    }
109                    
110                    try {
111                            if(nextHeader == null)
112                                    reader.close();
113                    } catch (IOException ex) {
114                            throw new ErrorException("Error closing "+file.getAbsolutePath(), ex);
115                    }
116                    
117                    if(qualityIt != null) {
118                            qualityIt.addNextQuality(seq);
119                    }
120                    return seq;
121            }
122            
123            /** A minimal sequence class that slurps its sequence in immediately. */
124    
125            public static class ReaderSequence extends SimpleFastaSequence {
126                    private static final long serialVersionUID = -637754971987954577L;
127                    File file;
128                    
129                    public ReaderSequence (File file, String header) {
130                            this.file = file;
131                            setHeader(header);
132                    }
133                    
134                    public File getFile() {
135                            return file;
136                    }
137                    
138                    public String loadSequence(BufferedReader reader) throws IOException {
139                            
140                            // read the sequence
141                                    
142                                    String line;
143                                    StringBuffer buffer = new StringBuffer();
144                                    
145                                    while (true) {
146                                            
147                                            // get the next line from the reader
148                                                            line = reader.readLine();
149                                                    
150                                            // see if the line starts a new sequence
151                                                    if (line == null || line.startsWith(HEADER_START))
152                                                            break;
153                                                    else
154                                                            buffer.append(line.trim());
155                                    }
156                            
157                            setSequence(buffer.toString());
158                            
159                            // extract the header from the header line
160                                    if (line != null)
161                                            line = line.substring(1).trim();
162                            
163                            return line;
164                    }
165            }
166    }