001 package calhoun.analysis.crf.io;
002
003 import java.io.BufferedWriter;
004 import java.io.FileWriter;
005 import java.io.IOException;
006 import java.io.Serializable;
007 import java.io.Writer;
008 import java.util.ArrayList;
009 import java.util.Iterator;
010 import java.util.List;
011 import java.util.Set;
012
013 import org.apache.commons.logging.Log;
014 import org.apache.commons.logging.LogFactory;
015
016 import calhoun.analysis.crf.ModelManager;
017 import calhoun.analysis.crf.io.IntervalInputSequence.IntervalRangeMapValue;
018 import calhoun.analysis.crf.statistics.PredictedActualBinaryContingencyTable;
019 import calhoun.util.Assert;
020 import calhoun.util.DenseBooleanMatrix2D;
021 import calhoun.util.FileUtil;
022 import calhoun.util.RangeMap;
023
024 /** A legacy output handler that computes basic stats, gene calling statsm and then writes out a GTF file.
025 */
026 public class OutputHandlerGeneCallStats implements OutputHandler {
027 private static final long serialVersionUID = -1506791895658464225L;
028 private static final Log log = LogFactory.getLog(OutputHandlerGeneCallStats.class);
029
030 private ModelManager manager;
031 private InputHandler inputHandler;
032 String location;
033 boolean writeTrainingData = false;
034
035 /** default constructor. <code>ModelManager</code> and <code>InputHandler</code> must be configured separately. */
036 public OutputHandlerGeneCallStats() {
037 }
038
039 /** creates an output handler using this model and input handler
040 * @param manager the model used for gene calling. Used for calculating stats.
041 * @param inputHandler input handler which will be used for writing out the input sequence with the results.
042 */
043 public OutputHandlerGeneCallStats(ModelManager manager, InputHandler inputHandler) {
044 this.inputHandler = inputHandler;
045 setManager(manager);
046 }
047
048 public void setOutputLocation(String location) {
049 this.location = location;
050 }
051
052 public void writeOutput(InputSequence<?> sequence, int[] hiddenStates) throws IOException {
053 throw new UnsupportedOperationException();
054 }
055
056 public void writeTestOutput(InputSequence<?> sequence, int[] truePath, int[] hiddenStates) throws IOException {
057 calcResultIncrement(new TrainingSequence(sequence, truePath), hiddenStates);
058 }
059
060 public void outputComplete() throws IOException {
061 if(location != null) {
062 if(writeTrainingData) {
063 try {
064 inputHandler.writeTrainingData(location, labeled);
065 }
066 catch(Exception ex) {
067 log.warn("Unable to write training data", ex);
068 }
069 }
070 writeGTF(labeled, location + ".gtf");
071 System.out.print(this);
072 writeResults(location + ".dat");
073 }
074 }
075
076 /** retursn the input handler used to write out the input sequences
077 * @return the inputHandler which will be used to write out the input sequences
078 */
079 public InputHandler getInputHandler() {
080 return inputHandler;
081 }
082
083 /** sets the inputHandler used to write out the input sequences
084 * @param inputHandler the inputHandler used to write out the input sequences
085 */
086 public void setInputHandler(InputHandler inputHandler) {
087 this.inputHandler = inputHandler;
088 }
089
090 /** gets the model used to generate results
091 * @return the model used to generate results
092 */
093 public ModelManager getManager() {
094 return manager;
095 }
096
097 /** sets the model used to generate results
098 * @param manager the model used to generate results
099 */
100 public void setManager(ModelManager manager) {
101 this.manager = manager;
102
103 ctCodingNucleotide = new PredictedActualBinaryContingencyTable();
104
105 ctExons = new PredictedActualBinaryContingencyTable();
106 ctExons.forgetTN();
107
108 nStates = manager.getNumStates();
109 ctStates = new ArrayList<PredictedActualBinaryContingencyTable>();
110 for (int i=0; i<nStates; i++) {
111 ctStates.add(new PredictedActualBinaryContingencyTable());
112 }
113
114 DenseBooleanMatrix2D LT = manager.getLegalTransitions();
115 fromInd = new ArrayList<Integer>();
116 toInd = new ArrayList<Integer>();
117 for (int from=0; from<nStates; from++) {
118 for (int to=0; to<nStates; to++ ) {
119 if (LT.getQuick(from,to)) {
120 fromInd.add(from);
121 toInd.add(to);
122 }
123 }
124 }
125 nTransitions = fromInd.size();
126 ctTransitions = new ArrayList<PredictedActualBinaryContingencyTable>();
127 for (int i=0; i<nTransitions; i++) {
128 ctTransitions.add(new PredictedActualBinaryContingencyTable());
129 }
130 }
131
132 private List<TrainingSequence<?>> labeled = new ArrayList<TrainingSequence<?>>();
133 private int correct = 0;
134 private int incorrect = 0;
135 private int perfect = 0;
136 private int imperfect = 0;
137 private transient double[] viterbiScores;
138 private double lla = 0, llv = 0;
139
140 // Info we'll need to know about the model in order to be clever about gathering stats
141 // (the manager must be provided, other things derived from it)
142 private int nStates;
143 private int nTransitions;
144 private List<Integer> fromInd;
145 private List<Integer> toInd;;
146
147 // The 2x2 contingency tables for which we'll keep track of results:
148 private PredictedActualBinaryContingencyTable ctCodingNucleotide;
149 private PredictedActualBinaryContingencyTable ctExons;
150 private List<PredictedActualBinaryContingencyTable> ctStates;
151 private List<PredictedActualBinaryContingencyTable> ctTransitions;
152
153 /** returns the exact nucleotide accuracy of the result */
154 public float getAccuracy() {
155 return correct / (float)(correct+incorrect);
156 }
157
158 public static class Results implements Serializable {
159 private static final long serialVersionUID = 9082449588200635355L;
160 public PredictedActualBinaryContingencyTable ctCodingNucleotide;
161 public PredictedActualBinaryContingencyTable ctExons;
162 public List<PredictedActualBinaryContingencyTable> ctStates;
163 public List<PredictedActualBinaryContingencyTable> ctTransitions;
164 public int correct;
165 public int incorrect;
166 public int perfect;
167 public int imperfect;
168 }
169
170 void writeResults(String loc) throws IOException {
171 Results results = new Results();
172 results.ctCodingNucleotide = ctCodingNucleotide;
173 results.ctExons = ctExons;
174 results.ctStates = ctStates;
175 results.ctTransitions = ctTransitions;
176 results.correct = correct;
177 results.incorrect = incorrect;
178 results.perfect = perfect;
179 results.imperfect = imperfect;
180 FileUtil.writeObject(loc, results);
181 }
182
183 @Override
184 public String toString() {
185 String ret = "NOTE: If you're using the CRF for prediction and pass in a dummy (e.g. all zeros) hidden\n";
186 ret += " sequence, then many of the following statistics will not be meaningful\n";
187
188 for (int s=0; s<nStates; s++) {
189 ret += "[State=" + manager.getStateName(s) + "] ";
190 ctStates.get(s).freeze();
191 ret += ctStates.get(s).summarize();
192 ret += "\n";
193 }
194
195 for (int t=0; t<nTransitions; t++) {
196 ret += "[Transition " + manager.getStateName(fromInd.get(t)) + " --> " + manager.getStateName(toInd.get(t)) + " ] ";
197 ctTransitions.get(t).freeze();
198 ret += ctTransitions.get(t).summarize();
199 ret += "\n";
200 }
201
202 ctCodingNucleotide.freeze();
203 ret += "[Coding nucleotides] " + ctCodingNucleotide.summarize() + "\n";
204
205 ctExons.freeze();
206 ret += "[Coding exons] " + ctExons.summarize() + "\n";
207
208 if (lla>0) {
209 ret += "LLA:" + lla + " LLV:" + llv + " " + "\n";
210 }
211
212 ret += String.format("Perfectly predicted hidden sequences: %d/%d %.2f %%",perfect,perfect+imperfect,perfect*100.0/(float) (perfect+imperfect))+ "\n";
213
214 ret += String.format("Nucleotide Hidden State Agreement: %d/%d %.2f %%",correct, correct + incorrect, correct * 100.0 / (float) (correct + incorrect)) + "\n";
215
216
217 return ret;
218 }
219
220 /** calculates statstics and output for results on a given test sequence */
221 public void calcResultIncrement(TrainingSequence training, int[] predictedHiddenSequence) {
222 labeled.add(new TrainingSequence(training.getInputSequence(), predictedHiddenSequence)); // This is only place that labelled gets added to???
223 // So I guess the results just get built up incrementally, both the actuall hidden sequences and the stats?
224 Assert.a(training.length() == predictedHiddenSequence.length);
225 int[] actualHiddenSequence = new int[training.length()];
226 for (int i=0; i<training.length(); i++) {
227 actualHiddenSequence[i] = training.getY(i);
228 }
229 boolean thisperfect = true;
230 for (int i = 0; i < predictedHiddenSequence.length; ++i) {
231 int predY = predictedHiddenSequence[i];
232 int realY = actualHiddenSequence[i];
233
234 if (realY == predY) { correct += 1; } else { incorrect += 1; thisperfect = false; }
235
236 ctCodingNucleotide.increment(isCodingPlus(predY),isCodingPlus(realY));
237 ctCodingNucleotide.increment(isCodingMinus(predY),isCodingMinus(realY));
238
239 for (int s=0; s<nStates; s++) {
240 ctStates.get(s).increment((predY==s),(realY==s));
241 }
242 }
243 if (thisperfect) {
244 perfect++;
245 } else {
246 imperfect++;
247 }
248 for (int i = 1; i < predictedHiddenSequence.length; ++i) {
249 int predY = predictedHiddenSequence[i];
250 int realY = actualHiddenSequence[i];
251 int predYp = predictedHiddenSequence[i-1];
252 int realYp = actualHiddenSequence[i-1];
253
254 for (int t=0; t<nTransitions; t++) {
255 boolean bPred = ( (predYp==fromInd.get(t)) && (predY==toInd.get(t)) );
256 boolean bReal = ( (realYp==fromInd.get(t)) && (realY==toInd.get(t)) );
257 ctTransitions.get(t).increment( bPred , bReal );
258 }
259 }
260
261 // Now let's increment the contingency table for exons; note that here not counting TN's
262 RangeMap predExonsPlus = new RangeMap();
263 RangeMap predExonsMinus = new RangeMap();
264 RangeMap realExonsPlus = new RangeMap();
265 RangeMap realExonsMinus = new RangeMap();
266 makeExonRangeMapFrom13SV(predictedHiddenSequence,predExonsPlus,predExonsMinus);
267 makeExonRangeMapFrom13SV(actualHiddenSequence,realExonsPlus,realExonsMinus);
268 incrementCTFromRangeMaps(ctExons,predExonsPlus,realExonsPlus);
269 incrementCTFromRangeMaps(ctExons,predExonsMinus,realExonsMinus);
270
271 }
272
273 private void incrementCTFromRangeMaps(PredictedActualBinaryContingencyTable ct, RangeMap pred, RangeMap real) {
274 // By looping through the predictions, can get at TP and FP
275 Set<IntervalRangeMapValue> pv = pred.values();
276 Iterator<IntervalRangeMapValue> pvi = pv.iterator();
277 while(pvi.hasNext()) {
278 IntervalRangeMapValue irmv = pvi.next();
279 Set<IntervalRangeMapValue> vals = real.find(irmv.start,irmv.end);
280 if(vals.size() == 0) {
281 ct.incrementFP();
282 }
283 else {
284 IntervalRangeMapValue val = vals.iterator().next();
285 if(val.start == irmv.start && val.end == irmv.end) {
286 ct.incrementTP();
287 } else {
288 ct.incrementFP();
289 }
290 }
291 }
292 Set<IntervalRangeMapValue> rv = real.values();
293 Iterator<IntervalRangeMapValue> rvi = rv.iterator();
294 while(rvi.hasNext()) {
295 IntervalRangeMapValue irmv = rvi.next();
296 if (!pred.hasEntry(irmv.start,irmv.end)) {
297 ct.incrementFN();
298 }
299 Set<IntervalRangeMapValue> vals = pred.find(irmv.start,irmv.end);
300 if(vals.size() == 0) {
301 ct.incrementFN();
302 }
303 else {
304 IntervalRangeMapValue val = vals.iterator().next();
305 if(val.start == irmv.start && val.end == irmv.end) {
306 } else {
307 ct.incrementFN();
308 }
309 }
310 }
311 }
312
313
314 private void makeExonRangeMapFrom13SV(int[] hidden, RangeMap exonsPlus, RangeMap exonsMinus) {
315
316 int len = hidden.length;
317
318 for (int i=1; i<len; i++) {
319 if ((!isCodingPlus(hidden[i-1]) && (isCodingPlus(hidden[i])))) {
320 int j=i;
321 while ((isCodingPlus(hidden[j])) &&(j<(len-1))) { j++; }
322 exonsPlus.add(i,j,new IntervalRangeMapValue(i,j,1.0));
323 //log.info("Add + "+i+" "+j);
324 }
325 if ((!isCodingMinus(hidden[i-1]) && (isCodingMinus(hidden[i])))) {
326 int j=i;
327 while ((isCodingMinus(hidden[j])) &&(j<len-1)) { j++; }
328 exonsMinus.add(i,j,new IntervalRangeMapValue(i,j,1.0));
329 //log.info("Add - "+i+" "+j);
330 }
331 }
332 }
333
334 private boolean isCodingPlus(int y) {
335 Assert.a( (y>=0) && (y<13) );
336 if ( (y==1) || (y==2) || (y==3) ) { return true; }
337 return false;
338 }
339
340 private boolean isCodingMinus(int y) {
341 Assert.a( (y>=0) && (y<13) );
342 if ( (y==7) || (y==8) || (y==9) ) { return true; }
343 return false;
344 }
345
346 public void loglikelihoodIncrement(double logLikelihoodActual, double logLikelihoodViterbi) {
347 lla += logLikelihoodActual;
348 llv += logLikelihoodViterbi;
349 }
350
351 public TrainingSequence getLabeled(int i) {
352 return labeled.get(i);
353 }
354
355 String seqName;
356 String genePrefix;
357 long offset;
358
359 // This function converts a 13 state model hidden sequence to a GTF file.
360 public void writeGTF(List<? extends TrainingSequence<?>> refStates, String filename) throws IOException
361 {
362 int ref, geneNum, seqCount, frame=-1;
363 long i, exonStart, exonEnd, end;
364 boolean inPlusExon, inMinusExon, firstExon, startCodonSplit;
365 String strand;
366 Writer fout = new BufferedWriter(new FileWriter(filename));
367 exonStart = exonEnd = 0;
368 geneNum = 1;
369 seqCount = 0;
370
371 // Determine if model is tricycle13 or interval13.
372 boolean interval13 = false;
373 int prevState, state;
374 for (TrainingSequence seq : refStates) {
375 if (seq.length() == 0) continue;
376
377 prevState = seq.getY(0);
378 for (i=1; i<seq.length(); i++) {
379 state = seq.getY((int)i);
380 if (prevState == 0 && (state==2 || state==3 || state==7 || state==8)) {
381 interval13 = true;
382 break;
383 }
384 prevState = state;
385 }
386 if (interval13)
387 break;
388 }
389
390 for (TrainingSequence seq : refStates) {
391
392 if (interval13) {
393 SequenceConverter.convertSeqFromInterval13ToTricycle13(seq);
394 }
395
396 inPlusExon = false;
397 inMinusExon = false;
398 firstExon = true;
399 startCodonSplit = false;
400
401 parseSeqName(seq, seqCount);
402
403 for (i=0; i<seq.length(); i++)
404 {
405 ref = seq.getY((int)i);
406
407 if (ref == 1 || ref == 2 || ref == 3) // in a plus exon
408 {
409 if (!inPlusExon)
410 {
411 exonStart = i+1;
412 inPlusExon = true;
413 frame = setFrame(ref);
414 }
415 }
416 else if (ref == 7 || ref == 8 || ref == 9) // in a minus exon
417 {
418 if (!inMinusExon)
419 {
420 exonStart = i+1;
421 inMinusExon = true;
422 frame = setFrame(ref);
423 if (firstExon) {
424 if (i < 3)
425 System.err.println("Minus strand gene start is within 3 nucleotides of sequence start. No stop codon writen to GTF for gene starting at position " + (exonStart+offset));
426 else
427 writeGFTLine(fout,seqName,"stop_codon",exonStart+offset-3,exonStart+offset-1,"-",frame,genePrefix,geneNum);
428
429 }
430 }
431 }
432 else if ( inPlusExon && (ref == 4 || ref == 5 || ref == 6) ) { // just ended an exon on plus strand, now in a plus intron
433 strand = "+";
434 inPlusExon = false;
435 exonEnd = i;
436 if (firstExon) {
437 if (exonEnd - exonStart + 1 < 3) { end = exonEnd + offset; startCodonSplit = true;}
438 else { end = exonStart+offset+2; }
439 writeGFTLine(fout,seqName,"start_codon",exonStart+offset,end,strand,frame,genePrefix,geneNum);
440 firstExon = false;
441 }
442 else if (startCodonSplit) { // at second exon that contains part of start codon
443 Assert.a(frame==1 || frame==2);
444 writeGFTLine(fout,seqName,"start_codon",exonStart+offset,exonStart+offset+frame-1,strand,frame,genePrefix,geneNum);
445 startCodonSplit = false;
446 }
447 writeGFTLine(fout,seqName,"CDS",exonStart+offset,exonEnd+offset,strand,frame,genePrefix,geneNum);
448 }
449 else if (inMinusExon && (ref == 10 || ref == 11 || ref == 12)) { // just ended an exon on minus strand, now in a minus intron
450 strand = "-";
451 inMinusExon = false;
452 firstExon = false;
453 exonEnd = i;
454 writeGFTLine(fout,seqName,"CDS",exonStart+offset,exonEnd+offset,strand,frame,genePrefix,geneNum);
455 }
456 else // now in intergenic region
457 {
458 boolean write = true;
459 if (inPlusExon) // was in gene at previous nucleotide
460 {
461 strand = "+";
462 exonEnd = i;
463 if (firstExon) {
464 if (exonEnd - exonStart + 1 < 3) {
465 System.err.println("Single '" + strand + "' strand exon is < 3 bases for sequence '" + seqName + "'. exonStart=" + exonStart + " exonEnd=" + exonEnd);
466 write = false;
467 }
468 else {
469 writeGFTLine(fout,seqName,"start_codon",exonStart+offset,exonStart+offset+2,strand,frame,genePrefix,geneNum);
470 }
471 }
472 else if (startCodonSplit) { // at second exon that contains part of start codon
473 Assert.a(frame==1 || frame==2);
474 writeGFTLine(fout,seqName,"start_codon",exonStart+offset,exonStart+offset+frame-1,strand,frame,genePrefix,geneNum);
475 }
476 if (write) {
477 writeGFTLine(fout,seqName,"CDS",exonStart+offset,exonEnd+offset, strand,frame,genePrefix,geneNum);
478 writeGFTLine(fout,seqName,"stop_codon",exonEnd+offset+1,exonEnd+offset+3,strand,0, genePrefix,geneNum);
479 }
480 inPlusExon = false;
481 firstExon = true;
482 startCodonSplit = false;
483 geneNum++;
484 }
485 else if (inMinusExon) {
486 strand = "-";
487 long prevExonEnd = exonEnd;
488 exonEnd = i;
489 if (firstExon && exonEnd - exonStart + 1 < 3) {
490 System.err.println("Single '" + strand + "' strand exon is < 3 bases for sequence '" + seqName + "'. exonStart=" + exonStart + " exonEnd=" + exonEnd);
491 }
492 else if (exonEnd - exonStart + 1 < 3) { // this exon is < 3 bases, need to split start codon
493 if (exonEnd - exonStart + 1 == 2) { // this exon is 2 bases
494 writeGFTLine(fout,seqName,"start_codon",prevExonEnd+offset,prevExonEnd+offset,strand,0,genePrefix,geneNum);
495 writeGFTLine(fout,seqName,"CDS",exonStart+offset,exonEnd+offset,strand,frame,genePrefix,geneNum);
496 writeGFTLine(fout,seqName,"start_codon",exonStart+offset,exonEnd+offset,strand,2,genePrefix,geneNum);
497 }
498 else if (exonEnd - exonStart + 1 == 1) { // this exon is 1 base
499 writeGFTLine(fout,seqName,"start_codon",prevExonEnd+offset-1,prevExonEnd+offset,strand,0,genePrefix,geneNum);
500 writeGFTLine(fout,seqName,"CDS",exonStart+offset,exonEnd+offset,strand,frame,genePrefix,geneNum);
501 writeGFTLine(fout,seqName,"start_codon",exonStart+offset,exonEnd+offset,strand,1,genePrefix,geneNum);
502 }
503 }
504 else {
505 writeGFTLine(fout,seqName,"CDS",exonStart+offset,exonEnd+offset,strand,frame,genePrefix,geneNum);
506 writeGFTLine(fout,seqName,"start_codon",exonEnd+offset-2,exonEnd+offset,strand,0, genePrefix,geneNum);
507 }
508 inMinusExon = false;
509 firstExon = true;
510 startCodonSplit = false;
511 geneNum++;
512 }
513 }
514 }
515 seqCount++;
516 }
517 fout.close();
518 }
519
520 private void parseSeqName(TrainingSequence seq, int seqNum) {
521 NameInputSequence nameInput = null;
522
523 InputSequence<?> inputSeq = seq.getInputSequence();
524 if(inputSeq instanceof InputSequenceComposite) {
525 nameInput = (NameInputSequence) inputSeq.getComponent("name");
526
527 }
528 if(nameInput == null) {
529 log.debug("Sequence name not specified. Setting sequence name to 'SEQ_" + String.valueOf(seqNum) + "'");
530 seqName = "SEQ_" + String.valueOf(seqNum); // Create a name and return.
531 genePrefix = "SEQ_" + String.valueOf(seqNum);
532 offset = 0;
533 return;
534 }
535 String name = nameInput.getName().trim();
536
537 int colon1, colon2, numColons;
538
539 if (name.startsWith("group:") || name.startsWith("seq:") ) {
540 numColons = numOccurrences(name, ':');
541 if (numColons == 1) {
542 colon1 = name.indexOf(":");
543 seqName = name;
544 genePrefix = name.substring(colon1 + 1, name.length());
545 offset = 0;
546 return;
547 }
548 else if (numColons == 2) {
549 colon1 = name.indexOf(":");
550 colon2 = name.lastIndexOf(":");
551 seqName = name.substring(0, colon2);
552 genePrefix = name.substring(colon1 + 1, colon2);
553 int pound = genePrefix.indexOf("#");
554 if (pound > 0) {
555 genePrefix = genePrefix.substring(0, pound);
556 }
557 setOffset(name.substring(colon2+1, name.length()));
558 return;
559 }
560 }
561 log.debug("Sequence name is in unexpected format. Setting offset=0 and sequence name='" + name + "'.");
562 seqName = name;
563 genePrefix = name;
564 offset = 0;
565 }
566
567 // Returns the number of times the character 'c' occurs in 'str'
568 private static int numOccurrences(String str, char c) {
569 int num = 0;
570 int index = str.indexOf(c);
571 while (index != -1) {
572 num++;
573 index = str.indexOf(c, index+1);
574 }
575 return num;
576 }
577
578 private void setOffset(String str) {
579 int numDashes, dash;
580 numDashes = numOccurrences(str, '-');
581
582 if (numDashes == 0) {
583 offset = 0;
584 }
585 else if (numDashes == 1) {
586 try {
587 dash = str.indexOf("-");
588 offset = Long.valueOf(str.substring(0, dash)) - 1;
589 }
590 catch (NumberFormatException e) {
591 System.err.println("Sequence range values in unexpected format. Setting offset=0 for sequence='" + seqName + "'.");
592 offset = 0;
593 }
594 }
595 else {
596 System.err.println("Sequence range values in unexpected format. Setting offset=0 for sequence='" + seqName + "'.");
597 offset = 0;
598 }
599 }
600
601 // Frame is the nmber of bases in this region befor you get in frame.
602 // That is, if frame is 0, the first three bases in this element are a codon.
603 // If frame is 1, the first base is the end of a codon hanging over from the
604 // end of the previous codon and the next three are the first codon in this feature.
605 // If frame is 2, the first two bases are the end of the previous codon and the
606 // next three are the first codon in this feature.
607 private static int setFrame(int ref) {
608 int frame = -1;
609
610 switch (ref) {
611 case 1: frame = 0; break;
612 case 2: frame = 2; break;
613 case 3: frame = 1; break;
614 case 7: frame = 1; break;
615 case 8: frame = 2; break;
616 case 9: frame = 0; break;
617 default: Assert.a(false, "Error setting frame, ref = ", ref);
618 }
619 return frame;
620 }
621
622 // Outputs one line to the GTF file.
623 // NOTE: source is assumed to be 'CONRAD', and score is assumed to be unknown and set to '.'.
624 private static void writeGFTLine(Writer out, String seqName, String feature, long exonStart, long exonEnd,
625 String strand, int frame, String genePrefix, int geneNum) throws IOException {
626
627 Assert.a(frame==0 || frame==1 || frame==2, "Frame value invalid, frame = ", frame);
628
629 String geneId = genePrefix + "G_" + String.valueOf(geneNum);
630 String transId = genePrefix + "T_" + String.valueOf(geneNum) + ".1";
631
632 out.write(seqName + "\t" + "CONRAD" + "\t" + feature + "\t" + exonStart + "\t" + exonEnd + "\t" +
633 "." + "\t" + strand + "\t" + frame + "\t" +
634 "gene_id \"" + geneId + "\"; transcript_id \"" + transId + "\";\n");
635 }
636
637 public double[] getViterbiScores() {
638 return viterbiScores;
639 }
640
641 /**
642 * @return Returns the writeTrainingData.
643 */
644 public boolean isWriteTrainingData() {
645 return writeTrainingData;
646 }
647
648 /**
649 * @param writeTrainingData The writeTrainingData to set.
650 */
651 public void setWriteTrainingData(boolean writeTrainingData) {
652 this.writeTrainingData = writeTrainingData;
653 }
654
655 }