001 package calhoun.analysis.crf.io;
002
003 import java.io.IOException;
004 import java.util.ArrayList;
005 import java.util.Arrays;
006 import java.util.Collections;
007 import java.util.List;
008 import java.util.Map;
009
010 import calhoun.seq.FastaIterator;
011 import calhoun.seq.FastaSequence;
012 import calhoun.seq.FastaWriter;
013 import calhoun.util.Assert;
014
015 public class FastaAlignmentInput implements InputComponentIO {
016 private static final long serialVersionUID = 760405914814389112L;
017
018 String component;
019
020 public List<String> getComponentNames() {
021 return Collections.singletonList(component);
022 }
023
024 public void readInputSequences(String location, List<Map<String, InputSequence<?>>> inputs) throws IOException {
025 FastaIterator it = new FastaIterator(location);
026 int seqNum = 0;
027 String[] current = parseSeq(it.next());
028
029 List<String> species = null;
030
031 for(Map<String, InputSequence<?>> input : inputs) {
032 MultipleAlignmentInputSequence alignment = (MultipleAlignmentInputSequence) input.get(component);
033 Assert.a(alignment != null, "AlignmentTree must be input before the alignment Fasta");
034
035 if(species == null) {
036 species = new ArrayList();
037 species.addAll(alignment.getTree().getSpeciesSet());
038
039 }
040 List<String> consensuses = new ArrayList<String>();
041 for(int i=0; i<alignment.getNumSpecies(); ++i) {
042 consensuses.add(null);
043 }
044
045 // Loads all species for this sequence
046 int len = -1;
047 String thisSeq = current[0];
048 while(thisSeq.equals(current[0])) {
049 int ix = species.indexOf(current[1]);
050 Assert.a(ix != -1, "Seq: "+current[0]+". Species is missing: "+current[1]);
051 consensuses.set(ix, current[2]);
052 len = current[2].length();
053 if(it.hasNext())
054 current = parseSeq(it.next());
055 else
056 break;
057 }
058
059 // Pad missing species with gaps
060 Assert.a(len != -1, "No alignments available for this input.");
061 char[] gapChars = new char[len];
062 Arrays.fill(gapChars, '-');
063 String gaps = new String(gapChars);
064 for(int i = 0; i < consensuses.size(); ++i) {
065 if(consensuses.get(i) == null)
066 consensuses.set(i, gaps);
067 }
068
069 alignment.setSpeciesAndConsensuses(species, consensuses);
070 seqNum += 1;
071 }
072 }
073
074 String[] parseSeq(FastaSequence seq) {
075 String[] ret = new String[3];
076 String[] header = seq.getHeader().split(" ");
077 ret[0] = header[0];
078 ret[1] = header[1];
079 ret[2] = seq.getSequence();
080 return ret;
081 }
082
083 public void writeInputSequences(String location, List<? extends Map<String, ? extends InputSequence<?>>> inputComponents) throws IOException {
084 FastaWriter w = new FastaWriter(location, false);
085 for(Map<String, ? extends InputSequence<?>> input : inputComponents) {
086 MultipleAlignmentInputSequence alignment = (MultipleAlignmentInputSequence) input.get(component);
087 List<String> names = alignment.getSpeciesNames();
088 List<String> consensuses = alignment.getConsensusSeqs();
089 for(int i=0; i<names.size(); ++i) {
090 w.writeSeq(names.get(i), consensuses.get(i));
091 }
092 }
093
094 w.close();
095 }
096
097 /**
098 * @return Returns the header.
099 */
100 public String getComponent() {
101 return component;
102 }
103
104 /**
105 * @param header The header to set.
106 */
107 public void setComponent(String header) {
108 this.component = header;
109 }
110 }