001    package calhoun.analysis.crf.io;
002    
003    import java.io.IOException;
004    import java.util.ArrayList;
005    import java.util.Arrays;
006    import java.util.Collections;
007    import java.util.List;
008    import java.util.Map;
009    
010    import calhoun.seq.FastaIterator;
011    import calhoun.seq.FastaSequence;
012    import calhoun.seq.FastaWriter;
013    import calhoun.util.Assert;
014    
015    public class FastaAlignmentInput implements InputComponentIO {
016            private static final long serialVersionUID = 760405914814389112L;
017    
018            String component;
019    
020            public List<String> getComponentNames() {
021                    return Collections.singletonList(component);
022            }
023    
024            public void readInputSequences(String location, List<Map<String, InputSequence<?>>> inputs) throws IOException {
025                    FastaIterator it = new FastaIterator(location);
026                    int seqNum = 0;
027                    String[] current = parseSeq(it.next());
028                    
029                    List<String> species = null;
030                    
031                    for(Map<String, InputSequence<?>> input : inputs) {
032                            MultipleAlignmentInputSequence alignment = (MultipleAlignmentInputSequence) input.get(component);
033                            Assert.a(alignment != null, "AlignmentTree must be input before the alignment Fasta");
034    
035                            if(species == null) {
036                                    species = new ArrayList();
037                                    species.addAll(alignment.getTree().getSpeciesSet());
038                                    
039                            }
040                            List<String> consensuses = new ArrayList<String>();
041                            for(int i=0; i<alignment.getNumSpecies(); ++i) {
042                                    consensuses.add(null);
043                            }
044                            
045                            // Loads all species for this sequence
046                            int len = -1;
047                            String thisSeq = current[0];
048                            while(thisSeq.equals(current[0])) {
049                                    int ix = species.indexOf(current[1]);
050                                    Assert.a(ix != -1, "Seq: "+current[0]+". Species is missing: "+current[1]);
051                                    consensuses.set(ix, current[2]);
052                                    len = current[2].length();
053                                    if(it.hasNext())
054                                            current = parseSeq(it.next());
055                                    else
056                                            break;
057                            }
058                            
059                            // Pad missing species with gaps
060                            Assert.a(len != -1, "No alignments available for this input.");
061                            char[] gapChars = new char[len];
062                            Arrays.fill(gapChars, '-');
063                            String gaps = new String(gapChars);
064                            for(int i = 0; i < consensuses.size(); ++i) {
065                                    if(consensuses.get(i) == null)
066                                            consensuses.set(i, gaps);
067                            }
068    
069                            alignment.setSpeciesAndConsensuses(species, consensuses);
070                            seqNum += 1;
071                    }
072            }
073    
074            String[] parseSeq(FastaSequence seq) {
075                    String[] ret = new String[3];
076                    String[] header = seq.getHeader().split(" ");
077                    ret[0] = header[0];
078                    ret[1] = header[1];
079                    ret[2] = seq.getSequence();
080                    return ret;
081            }
082            
083            public void writeInputSequences(String location, List<? extends Map<String, ? extends InputSequence<?>>> inputComponents) throws IOException {
084                    FastaWriter w = new FastaWriter(location, false);
085                    for(Map<String, ? extends InputSequence<?>> input : inputComponents) {
086                            MultipleAlignmentInputSequence alignment = (MultipleAlignmentInputSequence) input.get(component);
087                            List<String> names = alignment.getSpeciesNames();
088                            List<String> consensuses = alignment.getConsensusSeqs();
089                            for(int i=0; i<names.size(); ++i) {
090                                    w.writeSeq(names.get(i), consensuses.get(i));
091                            }
092                    }
093                    
094                    w.close();
095            }
096    
097            /**
098             * @return Returns the header.
099             */
100            public String getComponent() {
101                    return component;
102            }
103    
104            /**
105             * @param header The header to set.
106             */
107            public void setComponent(String header) {
108                    this.component = header;
109            }
110    }