001 package calhoun.seq;
002
003 import java.io.BufferedWriter;
004 import java.io.File;
005 import java.io.FileNotFoundException;
006 import java.io.FileWriter;
007 import java.io.IOException;
008 import java.io.Writer;
009
010 import org.apache.commons.logging.Log;
011 import org.apache.commons.logging.LogFactory;
012
013 import calhoun.util.ConfigException;
014 import calhoun.util.FileUtil;
015
016 /** Used for writing fasta files. Formats sequences to a specific line length.
017 * Allows multiple sequences to be to the same file.
018 */
019 public class FastaWriter {
020 private static final Log log = LogFactory.getLog(FastaWriter.class);
021
022 public static final int DEFAULT_LINE_LENGTH = 60;
023
024 BufferedWriter writer;
025 String filename = "<unknown>";
026 int lineLength;
027
028 // If this instance was instantiated with a writer rather than
029 // a file, this is false and this object is not responsible for
030 // closing the writer
031 boolean ownsWriter = true;
032
033 /** Opens a fasta file for writing. Uses the default line length.
034 * @param file Filename of the file to open
035 * @param append true if sequences should be appended to this file, false if it should be replaced.
036 * */
037 public FastaWriter(String file, boolean append) {
038 this(new File(file), append, DEFAULT_LINE_LENGTH);
039 }
040
041 /** Opens a fasta file for writing.
042 * @param file Filename of the file to open
043 * @param append true if sequences should be appended to this file, false if it should be replaced.
044 * @param lineLength Allows the length of a sequence line to be set.
045 * */
046 public FastaWriter(String file, boolean append, int lineLength) {
047 this(new File(file), append, lineLength);
048 }
049
050 /** Opens a fasta file for writing. Uses the default line length
051 * @param file Filename of the file to open
052 * @param append true if sequences should be appended to this file, false if it should be replaced.
053 * */
054 public FastaWriter(File file, boolean append) {
055 this(file, append, DEFAULT_LINE_LENGTH);
056 }
057
058 /** Opens a fasta file for writing.
059 * @param file Filename of the file to open
060 * @param append true if sequences should be appended to this file, false if it should be replaced.
061 * @param lineLength Allows the length of a sequence line to be set.
062 * */
063 public FastaWriter(File file, boolean append, int lineLength) {
064 try {
065 setupWriter(new FileWriter(file, append), lineLength);
066 // fasta files don't require a terminal newline (are terminal newlines allowed?)
067 // but initial newlines are not allowed
068 if (append && file.length() > 0) {
069 writer.newLine();
070 }
071 } catch (FileNotFoundException ex) {
072 throw new ConfigException("Not able to write fasta file: " + file, ex);
073 } catch (IOException ex) {
074 throw new ConfigException("Error writing fasta file: " + file, ex);
075 }
076 filename = file.getAbsolutePath();
077 }
078
079 public FastaWriter(Writer w, int lineLength) {
080 setupWriter(w, lineLength);
081 ownsWriter = false;
082 }
083
084 protected void setupWriter(Writer w, int lineLength) {
085 this.lineLength = lineLength;
086 writer = new BufferedWriter(w);
087 }
088
089 /** Writes a sequence to the file.
090 * @param header The fasta header for the sequence. Should not include '>'.
091 * @param content The actual sequence. It will be broken up into lines based on linelength as it is written out.
092 * */
093 public void writeSeq(String header, String content) {
094 try {
095 writer.write(">");
096 writer.write(header);
097 writer.newLine();
098 int i = 0;
099 log.debug("Writing sequence of length: " + content.length() + " to " + filename);
100 while (i < content.length()) {
101 CharSequence line = content.subSequence(i, Math.min(lineLength+i, content.length()) );
102 writer.write(line.toString());
103 writer.newLine();
104 i = i + lineLength;
105 }
106 writer.flush();
107 }
108 catch(IOException ex) {
109 throw new ConfigException("Not able to write fasta file: " + filename, ex);
110 }
111 }
112
113 /** Writes a portion of the sequence to the file.
114 * @param header The fasta header for the sequence. Should not include '>'.
115 * @param content The actual sequence. It will be broken up into lines based on linelength as it is written out.
116 * @param start 1-based index of the first base to write. If null, writing will start from the beginning of the sequence.
117 * @param stop 1-based index of the last base to write. If null, writing will continue until the end of the sequence.
118 * */
119 public void writeSeq(String header, String string, Integer start, Integer stop) {
120 CharSequence sequence = string;
121 // trim the sequence to the specified coordinates
122 // note that we don't subtract 1 from the stop coordinate,
123 // because of the way String.substring works
124 if (start != null && stop != null)
125 sequence = sequence.subSequence(start.intValue() - 1, stop.intValue());
126 else if (start != null)
127 sequence = sequence.subSequence(start.intValue() - 1, sequence.length());
128 else if (stop != null)
129 sequence = sequence.subSequence(0, stop.intValue());
130 writeSeq(header, sequence.toString());
131 }
132
133 /** Writes a sequence to the file, which may have been previously
134 * read from a different file.
135 */
136 public void writeSeq(FastaSequence seq) {
137 writeSeq(seq.getHeader(), seq.getSequence());
138 }
139
140 /** Closes the file. Once it is closed, no more sequences may be written.
141 * Open a new FastaWriter to append more sequences into the file.
142 */
143 public void close() {
144 FileUtil.safeClose(writer);
145 }
146
147 public String getFilename() {
148 return filename;
149 }
150
151 @Override
152 protected void finalize() {
153 if (ownsWriter) {
154 close();
155 }
156 }
157 }