001 /*
002 * The Broad Institute
003 *
004 * SOFTWARE COPYRIGHT NOTICE AGREEMENT
005 *
006 * This software and its documentation are copyright 2004 by the
007 * Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
008 *
009 * This software is supplied without any warranty or guaranteed support whatsoever.
010 * Neither the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
011 *
012 * Created on Jul 2, 2004
013 */
014 package calhoun.seq;
015
016 import java.io.BufferedReader;
017 import java.io.File;
018 import java.io.FileReader;
019 import java.io.IOException;
020 import java.util.Iterator;
021 import java.util.NoSuchElementException;
022
023 import calhoun.util.ConfigException;
024 import calhoun.util.ErrorException;
025
026 /**
027 * This class implements iterator and allows a fasta file to be
028 * iterated through without having to read anything. Can also take a second file containing quality.
029 *
030 * You can now use new FastaReader().iterator() instead.
031 */
032 public class FastaIterator implements Iterator<FastaSequence> {
033
034 public static final String HEADER_START = ">";
035
036 private File file;
037 private BufferedReader reader;
038 private String nextHeader;
039 private QualityIterator qualityIt;
040
041 public FastaIterator(String file) throws IOException {
042 this(new File(file), null);
043 }
044
045 public FastaIterator(File file) throws IOException {
046 this(file, null);
047 }
048
049 public FastaIterator(String sequence, String quality) throws IOException {
050 this(new File(sequence), quality == null ? null : new File(quality));
051 }
052
053 /** Creates an iterator over a sequence file and also possibly a quality file */
054 public FastaIterator(File sequence, File quality) throws IOException {
055
056 // save the file for error reporting
057 this.file = sequence;
058 if(!file.exists())
059 throw new IOException("The fasta file \""+file+"\" does not exist");
060
061 if(file.length() == 0) {
062 nextHeader = null;
063 return;
064 }
065
066 if(quality != null) {
067 qualityIt = new QualityIterator(quality, false);
068 }
069
070 // create a BufferedReader to read the file
071 reader = new BufferedReader(new FileReader(file));
072
073 // read in the first line
074 String nextHeaderLine;
075 do {
076 nextHeaderLine = reader.readLine();
077 } while(nextHeaderLine.trim().length() == 0);
078
079 // validate the line
080 if (nextHeaderLine.startsWith(HEADER_START) == false)
081 throw new ConfigException(file + " is not a valid FASTA file");
082
083 // clean up the header line
084 nextHeader = nextHeaderLine.substring(1).trim();
085 }
086
087 /** Throws an UnsupportedOperationException. */
088 public void remove() {
089 throw new UnsupportedOperationException("can't remove items from a FastaIterator");
090 }
091
092 public boolean hasNext() {
093 return nextHeader != null;
094 }
095
096 public FastaSequence next() {
097
098 if (nextHeader == null)
099 throw new NoSuchElementException();
100
101 ReaderSequence seq = new ReaderSequence(file, nextHeader);
102
103 try {
104 nextHeader = seq.loadSequence(reader);
105 }
106 catch (Exception e) {
107 throw new RuntimeException("error reading " + this.file, e);
108 }
109
110 try {
111 if(nextHeader == null)
112 reader.close();
113 } catch (IOException ex) {
114 throw new ErrorException("Error closing "+file.getAbsolutePath(), ex);
115 }
116
117 if(qualityIt != null) {
118 qualityIt.addNextQuality(seq);
119 }
120 return seq;
121 }
122
123 /** A minimal sequence class that slurps its sequence in immediately. */
124
125 public static class ReaderSequence extends SimpleFastaSequence {
126 private static final long serialVersionUID = -637754971987954577L;
127 File file;
128
129 public ReaderSequence (File file, String header) {
130 this.file = file;
131 setHeader(header);
132 }
133
134 public File getFile() {
135 return file;
136 }
137
138 public String loadSequence(BufferedReader reader) throws IOException {
139
140 // read the sequence
141
142 String line;
143 StringBuffer buffer = new StringBuffer();
144
145 while (true) {
146
147 // get the next line from the reader
148 line = reader.readLine();
149
150 // see if the line starts a new sequence
151 if (line == null || line.startsWith(HEADER_START))
152 break;
153 else
154 buffer.append(line.trim());
155 }
156
157 setSequence(buffer.toString());
158
159 // extract the header from the header line
160 if (line != null)
161 line = line.substring(1).trim();
162
163 return line;
164 }
165 }
166 }