001 package calhoun.analysis.crf.io;
002
003 import java.io.BufferedWriter;
004 import java.io.FileWriter;
005 import java.io.IOException;
006 import java.io.Serializable;
007 import java.io.Writer;
008 import java.util.ArrayList;
009 import java.util.Iterator;
010 import java.util.List;
011 import java.util.Set;
012
013 import org.apache.commons.logging.Log;
014 import org.apache.commons.logging.LogFactory;
015
016 import calhoun.analysis.crf.ModelManager;
017 import calhoun.analysis.crf.io.IntervalInputSequence.IntervalRangeMapValue;
018 import calhoun.analysis.crf.statistics.PredictedActualBinaryContingencyTable;
019 import calhoun.util.Assert;
020 import calhoun.util.DenseBooleanMatrix2D;
021 import calhoun.util.FileUtil;
022 import calhoun.util.RangeMap;
023
024 /** A legacy output handler that computes basic stats, gene calling statsm and then writes out a GTF file.
025 */
026 public class OutputHandlerGeneCallPredict implements OutputHandler {
027 private static final long serialVersionUID = 2014487490985409134L;
028
029 private static final Log log = LogFactory.getLog(OutputHandlerGeneCallPredict.class);
030
031 private ModelManager manager;
032 private InputHandler inputHandler;
033 String location;
034 boolean writeTrainingData = false;
035
036 /** default constructor. <code>ModelManager</code> and <code>InputHandler</code> must be configured separately. */
037 public OutputHandlerGeneCallPredict() {
038 }
039
040 /** creates an output handler using this model and input handler
041 * @param manager the model used for gene calling. Used for calculating stats.
042 * @param inputHandler input handler which will be used for writing out the input sequence with the results.
043 */
044 public OutputHandlerGeneCallPredict(ModelManager manager, InputHandler inputHandler) {
045 this.inputHandler = inputHandler;
046 setManager(manager);
047 }
048
049 public void setOutputLocation(String location) {
050 this.location = location;
051 }
052
053 public void writeOutput(InputSequence<?> sequence, int[] hiddenStates) throws IOException {
054 throw new UnsupportedOperationException();
055 }
056
057 public void writeTestOutput(InputSequence<?> sequence, int[] truePath, int[] hiddenStates) throws IOException {
058 calcResultIncrement(new TrainingSequence(sequence, truePath), hiddenStates);
059 }
060
061 public void outputComplete() throws IOException {
062 if(location != null) {
063 if(writeTrainingData) {
064 try {
065 inputHandler.writeTrainingData(location, labeled);
066 }
067 catch(Exception ex) {
068 log.warn("Unable to write training data", ex);
069 }
070 }
071 writeGTF(labeled, location + ".gtf");
072 System.out.print(this);
073 writeResults(location + ".dat");
074 }
075 }
076
077 /** retursn the input handler used to write out the input sequences
078 * @return the inputHandler which will be used to write out the input sequences
079 */
080 public InputHandler getInputHandler() {
081 return inputHandler;
082 }
083
084 /** sets the inputHandler used to write out the input sequences
085 * @param inputHandler the inputHandler used to write out the input sequences
086 */
087 public void setInputHandler(InputHandler inputHandler) {
088 this.inputHandler = inputHandler;
089 }
090
091 /** gets the model used to generate results
092 * @return the model used to generate results
093 */
094 public ModelManager getManager() {
095 return manager;
096 }
097
098 /** sets the model used to generate results
099 * @param manager the model used to generate results
100 */
101 public void setManager(ModelManager manager) {
102 this.manager = manager;
103
104 ctCodingNucleotide = new PredictedActualBinaryContingencyTable();
105
106 ctExons = new PredictedActualBinaryContingencyTable();
107 ctExons.forgetTN();
108
109 nStates = manager.getNumStates();
110 ctStates = new ArrayList<PredictedActualBinaryContingencyTable>();
111 for (int i=0; i<nStates; i++) {
112 ctStates.add(new PredictedActualBinaryContingencyTable());
113 }
114
115 DenseBooleanMatrix2D LT = manager.getLegalTransitions();
116 fromInd = new ArrayList<Integer>();
117 toInd = new ArrayList<Integer>();
118 for (int from=0; from<nStates; from++) {
119 for (int to=0; to<nStates; to++ ) {
120 if (LT.getQuick(from,to)) {
121 fromInd.add(from);
122 toInd.add(to);
123 }
124 }
125 }
126 nTransitions = fromInd.size();
127 ctTransitions = new ArrayList<PredictedActualBinaryContingencyTable>();
128 for (int i=0; i<nTransitions; i++) {
129 ctTransitions.add(new PredictedActualBinaryContingencyTable());
130 }
131 }
132
133 private List<TrainingSequence<?>> labeled = new ArrayList<TrainingSequence<?>>();
134 private int correct = 0;
135 private int incorrect = 0;
136 private int perfect = 0;
137 private int imperfect = 0;
138 private transient double[] viterbiScores;
139 private double lla = 0, llv = 0;
140
141 // Info we'll need to know about the model in order to be clever about gathering stats
142 // (the manager must be provided, other things derived from it)
143 private int nStates;
144 private int nTransitions;
145 private List<Integer> fromInd;
146 private List<Integer> toInd;;
147
148 // The 2x2 contingency tables for which we'll keep track of results:
149 private PredictedActualBinaryContingencyTable ctCodingNucleotide;
150 private PredictedActualBinaryContingencyTable ctExons;
151 private List<PredictedActualBinaryContingencyTable> ctStates;
152 private List<PredictedActualBinaryContingencyTable> ctTransitions;
153
154 /** returns the exact nucleotide accuracy of the result */
155 public float getAccuracy() {
156 return correct / (float)(correct+incorrect);
157 }
158
159 public static class Results implements Serializable {
160 private static final long serialVersionUID = 9082449588200635355L;
161 public PredictedActualBinaryContingencyTable ctCodingNucleotide;
162 public PredictedActualBinaryContingencyTable ctExons;
163 public List<PredictedActualBinaryContingencyTable> ctStates;
164 public List<PredictedActualBinaryContingencyTable> ctTransitions;
165 public int correct;
166 public int incorrect;
167 public int perfect;
168 public int imperfect;
169 }
170
171 void writeResults(String loc) throws IOException {
172 Results results = new Results();
173 results.ctCodingNucleotide = ctCodingNucleotide;
174 results.ctExons = ctExons;
175 results.ctStates = ctStates;
176 results.ctTransitions = ctTransitions;
177 results.correct = correct;
178 results.incorrect = incorrect;
179 results.perfect = perfect;
180 results.imperfect = imperfect;
181 FileUtil.writeObject(loc, results);
182 }
183
184 @Override
185 public String toString() {
186 String ret = "";
187
188 for (int s=0; s<nStates; s++) {
189 ret += "[State=" + manager.getStateName(s) + "] ";
190 ctStates.get(s).freeze();
191 ret += "Predicted: " + ctStates.get(s).pp();
192 ret += "\n";
193 }
194
195 for (int t=0; t<nTransitions; t++) {
196 ret += "[Transition " + manager.getStateName(fromInd.get(t)) + " --> " + manager.getStateName(toInd.get(t)) + " ] ";
197 ctTransitions.get(t).freeze();
198 ret += "Predicted: " + ctTransitions.get(t).pp();
199 ret += "\n";
200 }
201
202 ctCodingNucleotide.freeze();
203 ret += "[Coding nucleotides] Predicted: " + ctCodingNucleotide.pp() + "\n";
204
205 ctExons.freeze();
206 ret += "[Coding exons] Predicted: " + ctExons.pp() + "\n";
207
208 if (lla>0) {
209 ret += "LLA:" + lla + " LLV:" + llv + " " + "\n";
210 }
211
212 //ret += String.format("Perfectly predicted hidden sequences: %d/%d %.2f %%",perfect,perfect+imperfect,perfect*100.0/(float) (perfect+imperfect))+ "\n";
213
214 //ret += String.format("Nucleotide Hidden State Agreement: %d/%d %.2f %%",correct, correct + incorrect, correct * 100.0 / (float) (correct + incorrect)) + "\n";
215
216
217 return ret;
218 }
219
220 /** calculates statstics and output for results on a given test sequence */
221 public void calcResultIncrement(TrainingSequence training, int[] predictedHiddenSequence) {
222 labeled.add(new TrainingSequence(training.getInputSequence(), predictedHiddenSequence)); // This is only place that labelled gets added to???
223 // So I guess the results just get built up incrementally, both the actuall hidden sequences and the stats?
224 Assert.a(training.length() == predictedHiddenSequence.length);
225 int[] actualHiddenSequence = new int[training.length()];
226 for (int i=0; i<training.length(); i++) {
227 actualHiddenSequence[i] = training.getY(i);
228 }
229 boolean thisperfect = true;
230 for (int i = 0; i < predictedHiddenSequence.length; ++i) {
231 int predY = predictedHiddenSequence[i];
232 int realY = actualHiddenSequence[i];
233
234 if (realY == predY) { correct += 1; } else { incorrect += 1; thisperfect = false; }
235
236 ctCodingNucleotide.increment(isCodingPlus(predY),isCodingPlus(realY));
237 ctCodingNucleotide.increment(isCodingMinus(predY),isCodingMinus(realY));
238
239 for (int s=0; s<nStates; s++) {
240 ctStates.get(s).increment((predY==s),(realY==s));
241 }
242 }
243 if (thisperfect) {
244 perfect++;
245 } else {
246 imperfect++;
247 }
248 for (int i = 1; i < predictedHiddenSequence.length; ++i) {
249 int predY = predictedHiddenSequence[i];
250 int realY = actualHiddenSequence[i];
251 int predYp = predictedHiddenSequence[i-1];
252 int realYp = actualHiddenSequence[i-1];
253
254 for (int t=0; t<nTransitions; t++) {
255 boolean bPred = ( (predYp==fromInd.get(t)) && (predY==toInd.get(t)) );
256 boolean bReal = ( (realYp==fromInd.get(t)) && (realY==toInd.get(t)) );
257 ctTransitions.get(t).increment( bPred , bReal );
258 }
259 }
260
261 // Now let's increment the contingency table for exons; note that here not counting TN's
262 RangeMap predExonsPlus = new RangeMap();
263 RangeMap predExonsMinus = new RangeMap();
264 RangeMap realExonsPlus = new RangeMap();
265 RangeMap realExonsMinus = new RangeMap();
266 makeExonRangeMapFrom13SV(predictedHiddenSequence,predExonsPlus,predExonsMinus);
267 makeExonRangeMapFrom13SV(actualHiddenSequence,realExonsPlus,realExonsMinus);
268 incrementCTFromRangeMaps(ctExons,predExonsPlus,realExonsPlus);
269 incrementCTFromRangeMaps(ctExons,predExonsMinus,realExonsMinus);
270
271 }
272
273 private void incrementCTFromRangeMaps(PredictedActualBinaryContingencyTable ct, RangeMap pred, RangeMap real) {
274 // By looping through the predictions, can get at TP and FP
275 Set<IntervalRangeMapValue> pv = pred.values();
276 Iterator<IntervalRangeMapValue> pvi = pv.iterator();
277 while(pvi.hasNext()) {
278 IntervalRangeMapValue irmv = pvi.next();
279 Set<IntervalRangeMapValue> vals = real.find(irmv.start,irmv.end);
280 if(vals.size() == 0) {
281 ct.incrementFP();
282 }
283 else {
284 IntervalRangeMapValue val = vals.iterator().next();
285 if(val.start == irmv.start && val.end == irmv.end) {
286 ct.incrementTP();
287 } else {
288 ct.incrementFP();
289 }
290 }
291 }
292 Set<IntervalRangeMapValue> rv = real.values();
293 Iterator<IntervalRangeMapValue> rvi = rv.iterator();
294 while(rvi.hasNext()) {
295 IntervalRangeMapValue irmv = rvi.next();
296 if (!pred.hasEntry(irmv.start,irmv.end)) {
297 ct.incrementFN();
298 }
299 Set<IntervalRangeMapValue> vals = pred.find(irmv.start,irmv.end);
300 if(vals.size() == 0) {
301 ct.incrementFN();
302 }
303 else {
304 IntervalRangeMapValue val = vals.iterator().next();
305 if(val.start == irmv.start && val.end == irmv.end) {
306 } else {
307 ct.incrementFN();
308 }
309 }
310 }
311 }
312
313
314 private void makeExonRangeMapFrom13SV(int[] hidden, RangeMap exonsPlus, RangeMap exonsMinus) {
315
316 int len = hidden.length;
317
318 for (int i=1; i<len; i++) {
319 if ((!isCodingPlus(hidden[i-1]) && (isCodingPlus(hidden[i])))) {
320 int j=i;
321 while ((isCodingPlus(hidden[j])) &&(j<(len-1))) { j++; }
322 exonsPlus.add(i,j,new IntervalRangeMapValue(i,j,1.0));
323 //log.info("Add + "+i+" "+j);
324 }
325 if ((!isCodingMinus(hidden[i-1]) && (isCodingMinus(hidden[i])))) {
326 int j=i;
327 while ((isCodingMinus(hidden[j])) &&(j<len-1)) { j++; }
328 exonsMinus.add(i,j,new IntervalRangeMapValue(i,j,1.0));
329 //log.info("Add - "+i+" "+j);
330 }
331 }
332 }
333
334 private boolean isCodingPlus(int y) {
335 Assert.a( (y>=0) && (y<13) );
336 if ( (y==1) || (y==2) || (y==3) ) { return true; }
337 return false;
338 }
339
340 private boolean isCodingMinus(int y) {
341 Assert.a( (y>=0) && (y<13) );
342 if ( (y==7) || (y==8) || (y==9) ) { return true; }
343 return false;
344 }
345
346 public void loglikelihoodIncrement(double logLikelihoodActual, double logLikelihoodViterbi) {
347 lla += logLikelihoodActual;
348 llv += logLikelihoodViterbi;
349 }
350
351 public TrainingSequence getLabeled(int i) {
352 return labeled.get(i);
353 }
354
355 String seqName;
356 String genePrefix;
357 long offset;
358
359 // This function converts a 13 state model hidden sequence to a GTF file.
360 public void writeGTF(List<? extends TrainingSequence<?>> refStates, String filename) throws IOException
361 {
362 int ref, geneNum, seqCount, frame=-1;
363 long i, exonStart, exonEnd, end;
364 boolean inPlusExon, inMinusExon, firstExon, startCodonSplit;
365 String strand;
366 Writer fout = new BufferedWriter(new FileWriter(filename));
367 exonStart = exonEnd = 0;
368 geneNum = 1;
369 seqCount = 0;
370
371 // Determine if model is tricycle13 or interval13.
372 boolean interval13 = false;
373 int prevState, state;
374 for (TrainingSequence seq : refStates) {
375 if (seq.length() == 0) continue;
376
377 prevState = seq.getY(0);
378 for (i=1; i<seq.length(); i++) {
379 state = seq.getY((int)i);
380 if (prevState == 0 && (state==2 || state==3 || state==7 || state==8)) {
381 interval13 = true;
382 break;
383 }
384 prevState = state;
385 }
386 if (interval13)
387 break;
388 }
389
390 for (TrainingSequence seq : refStates) {
391
392 if (interval13) {
393 SequenceConverter.convertSeqFromInterval13ToTricycle13(seq);
394 }
395
396 inPlusExon = false;
397 inMinusExon = false;
398 firstExon = true;
399 startCodonSplit = false;
400
401 parseSeqName(seq, seqCount);
402
403 for (i=0; i<seq.length(); i++)
404 {
405 ref = seq.getY((int)i);
406
407 if (ref == 1 || ref == 2 || ref == 3) // in a plus exon
408 {
409 if (!inPlusExon)
410 {
411 exonStart = i+1;
412 inPlusExon = true;
413 frame = setFrame(ref);
414 }
415 }
416 else if (ref == 7 || ref == 8 || ref == 9) // in a minus exon
417 {
418 if (!inMinusExon)
419 {
420 exonStart = i+1;
421 inMinusExon = true;
422 frame = setFrame(ref);
423 if (firstExon) {
424 if (i < 3)
425 System.err.println("Minus strand gene start is within 3 nucleotides of sequence start. No stop codon writen to GTF for gene starting at position " + (exonStart+offset));
426 else
427 writeGFTLine(fout,seqName,"stop_codon",exonStart+offset-3,exonStart+offset-1,"-",frame,genePrefix,geneNum);
428
429 }
430 }
431 }
432 else if ( inPlusExon && (ref == 4 || ref == 5 || ref == 6) ) { // just ended an exon on plus strand, now in a plus intron
433 strand = "+";
434 inPlusExon = false;
435 exonEnd = i;
436 if (firstExon) {
437 if (exonEnd - exonStart + 1 < 3) { end = exonEnd + offset; startCodonSplit = true;}
438 else { end = exonStart+offset+2; }
439 writeGFTLine(fout,seqName,"start_codon",exonStart+offset,end,strand,frame,genePrefix,geneNum);
440 firstExon = false;
441 }
442 else if (startCodonSplit) { // at second exon that contains part of start codon
443 Assert.a(frame==1 || frame==2);
444 writeGFTLine(fout,seqName,"start_codon",exonStart+offset,exonStart+offset+frame-1,strand,frame,genePrefix,geneNum);
445 startCodonSplit = false;
446 }
447 writeGFTLine(fout,seqName,"CDS",exonStart+offset,exonEnd+offset,strand,frame,genePrefix,geneNum);
448 }
449 else if (inMinusExon && (ref == 10 || ref == 11 || ref == 12)) { // just ended an exon on minus strand, now in a minus intron
450 strand = "-";
451 inMinusExon = false;
452 firstExon = false;
453 exonEnd = i;
454 writeGFTLine(fout,seqName,"CDS",exonStart+offset,exonEnd+offset,strand,frame,genePrefix,geneNum);
455 }
456 else // now in intergenic region
457 {
458 boolean write = true;
459 if (inPlusExon) // was in gene at previous nucleotide
460 {
461 strand = "+";
462 exonEnd = i;
463 if (firstExon) {
464 if (exonEnd - exonStart + 1 < 3) {
465 System.err.println("Single '" + strand + "' strand exon is < 3 bases for sequence '" + seqName + "'. exonStart=" + exonStart + " exonEnd=" + exonEnd);
466 write = false;
467 }
468 else {
469 writeGFTLine(fout,seqName,"start_codon",exonStart+offset,exonStart+offset+2,strand,frame,genePrefix,geneNum);
470 }
471 }
472 else if (startCodonSplit) { // at second exon that contains part of start codon
473 Assert.a(frame==1 || frame==2);
474 writeGFTLine(fout,seqName,"start_codon",exonStart+offset,exonStart+offset+frame-1,strand,frame,genePrefix,geneNum);
475 }
476 if (write) {
477 writeGFTLine(fout,seqName,"CDS",exonStart+offset,exonEnd+offset, strand,frame,genePrefix,geneNum);
478 writeGFTLine(fout,seqName,"stop_codon",exonEnd+offset+1,exonEnd+offset+3,strand,0, genePrefix,geneNum);
479 }
480 inPlusExon = false;
481 firstExon = true;
482 startCodonSplit = false;
483 geneNum++;
484 }
485 else if (inMinusExon) {
486 strand = "-";
487 long prevExonEnd = exonEnd;
488 exonEnd = i;
489 if (firstExon && exonEnd - exonStart + 1 < 3) {
490 System.err.println("Single '" + strand + "' strand exon is < 3 bases for sequence '" + seqName + "'. exonStart=" + exonStart + " exonEnd=" + exonEnd);
491 }
492 else if (exonEnd - exonStart + 1 < 3) { // this exon is < 3 bases, need to split start codon
493 if (exonEnd - exonStart + 1 == 2) { // this exon is 2 bases
494 writeGFTLine(fout,seqName,"start_codon",prevExonEnd+offset,prevExonEnd+offset,strand,0,genePrefix,geneNum);
495 writeGFTLine(fout,seqName,"CDS",exonStart+offset,exonEnd+offset,strand,frame,genePrefix,geneNum);
496 writeGFTLine(fout,seqName,"start_codon",exonStart+offset,exonEnd+offset,strand,2,genePrefix,geneNum);
497 }
498 else if (exonEnd - exonStart + 1 == 1) { // this exon is 1 base
499 writeGFTLine(fout,seqName,"start_codon",prevExonEnd+offset-1,prevExonEnd+offset,strand,0,genePrefix,geneNum);
500 writeGFTLine(fout,seqName,"CDS",exonStart+offset,exonEnd+offset,strand,frame,genePrefix,geneNum);
501 writeGFTLine(fout,seqName,"start_codon",exonStart+offset,exonEnd+offset,strand,1,genePrefix,geneNum);
502 }
503 }
504 else {
505 writeGFTLine(fout,seqName,"CDS",exonStart+offset,exonEnd+offset,strand,frame,genePrefix,geneNum);
506 writeGFTLine(fout,seqName,"start_codon",exonEnd+offset-2,exonEnd+offset,strand,0, genePrefix,geneNum);
507 }
508 inMinusExon = false;
509 firstExon = true;
510 startCodonSplit = false;
511 geneNum++;
512 }
513 }
514 }
515 seqCount++;
516 }
517 fout.close();
518 }
519
520 private void parseSeqName(TrainingSequence seq, int seqNum) {
521 NameInputSequence nameInput = null;
522
523 InputSequence<?> inputSeq = seq.getInputSequence();
524 if(inputSeq instanceof InputSequenceComposite) {
525 nameInput = (NameInputSequence) inputSeq.getComponent("name");
526
527 }
528 if(nameInput == null) {
529 log.debug("Sequence name not specified. Setting sequence name to 'SEQ_" + String.valueOf(seqNum) + "'");
530 seqName = "SEQ_" + String.valueOf(seqNum); // Create a name and return.
531 genePrefix = "SEQ_" + String.valueOf(seqNum);
532 offset = 0;
533 return;
534 }
535 String name = nameInput.getName().trim();
536
537 int colon1, colon2, numColons;
538
539 if (name.startsWith("group:") || name.startsWith("seq:") ) {
540 numColons = numOccurrences(name, ':');
541 if (numColons == 1) {
542 colon1 = name.indexOf(":");
543 seqName = name;
544 genePrefix = name.substring(colon1 + 1, name.length());
545 offset = 0;
546 return;
547 }
548 else if (numColons == 2) {
549 colon1 = name.indexOf(":");
550 colon2 = name.lastIndexOf(":");
551 seqName = name.substring(0, colon2);
552 genePrefix = name.substring(colon1 + 1, colon2);
553 int pound = genePrefix.indexOf("#");
554 if (pound > 0) {
555 genePrefix = genePrefix.substring(0, pound);
556 }
557 setOffset(name.substring(colon2+1, name.length()));
558 return;
559 }
560 }
561 log.debug("Sequence name is in unexpected format. Setting offset=0 and sequence name='" + name + "'.");
562 seqName = name;
563 genePrefix = name;
564 offset = 0;
565 }
566
567 // Returns the number of times the character 'c' occurs in 'str'
568 private static int numOccurrences(String str, char c) {
569 int num = 0;
570 int index = str.indexOf(c);
571 while (index != -1) {
572 num++;
573 index = str.indexOf(c, index+1);
574 }
575 return num;
576 }
577
578 private void setOffset(String str) {
579 int numDashes, dash;
580 numDashes = numOccurrences(str, '-');
581
582 if (numDashes == 0) {
583 offset = 0;
584 }
585 else if (numDashes == 1) {
586 try {
587 dash = str.indexOf("-");
588 offset = Long.valueOf(str.substring(0, dash)) - 1;
589 }
590 catch (NumberFormatException e) {
591 System.err.println("Sequence range values in unexpected format. Setting offset=0 for sequence='" + seqName + "'.");
592 offset = 0;
593 }
594 }
595 else {
596 System.err.println("Sequence range values in unexpected format. Setting offset=0 for sequence='" + seqName + "'.");
597 offset = 0;
598 }
599 }
600
601 // Frame is the nmber of bases in this region befor you get in frame.
602 // That is, if frame is 0, the first three bases in this element are a codon.
603 // If frame is 1, the first base is the end of a codon hanging over from the
604 // end of the previous codon and the next three are the first codon in this feature.
605 // If frame is 2, the first two bases are the end of the previous codon and the
606 // next three are the first codon in this feature.
607 private static int setFrame(int ref) {
608 int frame = -1;
609
610 switch (ref) {
611 case 1: frame = 0; break;
612 case 2: frame = 2; break;
613 case 3: frame = 1; break;
614 case 7: frame = 1; break;
615 case 8: frame = 2; break;
616 case 9: frame = 0; break;
617 default: Assert.a(false, "Error setting frame, ref = ", ref);
618 }
619 return frame;
620 }
621
622 // Outputs one line to the GTF file.
623 // NOTE: source is assumed to be 'CONRAD', and score is assumed to be unknown and set to '.'.
624 private static void writeGFTLine(Writer out, String seqName, String feature, long exonStart, long exonEnd,
625 String strand, int frame, String genePrefix, int geneNum) throws IOException {
626
627 Assert.a(frame==0 || frame==1 || frame==2, "Frame value invalid, frame = ", frame);
628
629 String geneId = genePrefix + "G_" + String.valueOf(geneNum);
630 String transId = genePrefix + "T_" + String.valueOf(geneNum) + ".1";
631
632 out.write(seqName + "\t" + "CONRAD" + "\t" + feature + "\t" + exonStart + "\t" + exonEnd + "\t" +
633 "." + "\t" + strand + "\t" + frame + "\t" +
634 "gene_id \"" + geneId + "\"; transcript_id \"" + transId + "\";\n");
635 }
636
637 public double[] getViterbiScores() {
638 return viterbiScores;
639 }
640
641 /**
642 * @return Returns the writeTrainingData.
643 */
644 public boolean isWriteTrainingData() {
645 return writeTrainingData;
646 }
647
648 /**
649 * @param writeTrainingData The writeTrainingData to set.
650 */
651 public void setWriteTrainingData(boolean writeTrainingData) {
652 this.writeTrainingData = writeTrainingData;
653 }
654
655 }