001    /* The Broad Institute SOFTWARE COPYRIGHT NOTICE AGREEMENT
002    This software and its documentation are copyright 2003 by the Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
003    This software is supplied without any warranty or guaranteed support whatsoever. Neither the Broad Institute nor MIT can be responsible for its use, misuse, or functionality. 
004    */
005    package calhoun.util;
006    
007    import java.util.ArrayList;
008    import java.util.HashSet;
009    import java.util.Iterator;
010    import java.util.List;
011    import java.util.Set;
012    import java.util.regex.Pattern;
013    
014    import org.apache.commons.lang.StringUtils;
015    import org.apache.commons.logging.Log;
016    import org.apache.commons.logging.LogFactory;
017    import org.dom4j.Attribute;
018    import org.dom4j.Document;
019    import org.dom4j.Element;
020    
021    /** Utility class for diffing XML
022     */
023    public class XmlDiff {
024            private static final Log log = LogFactory.getLog(XmlDiff.class);
025    
026            /** Should not be constructed.  All static methods.
027             * 
028             */
029            private XmlDiff() {
030                    super();
031            }
032    
033            /** Compares two XML files.  the first is the template, which can contain '*' as a wildcard attribute or element value.  Returns a String describing the differences or null if the files match. 
034             * The comparison is an XML aware comparison that ignores whitespace differences and attribute order and also allows wildcard values.  
035                    Just put '*' as the value for any element content or attribute value and any plugin output will match.  It also ignores ordering of 
036                    duplicated child elements.  Therefore if <BlastRun> has 50 <BlastAlignment> child objects, the order does not have to match in the 
037                    2 documents.*/
038            public static String compareFiles(String templateName, String docName) {
039                    log.debug("Comparing files: "+templateName+" and "+docName);
040                    Document template = XmlUtil.parseFile(templateName);
041                    Document doc = XmlUtil.parseFile(docName);
042                    List result = new ArrayList();
043    
044                    if(template.getRootElement().getName() != doc.getRootElement().getName())
045                            result.add("Root nodes don't match.  Expected: '"+template.getRootElement().getName()+"', Received: "+doc.getRootElement().getName());
046                    else
047                            result = compareXmlElements(template.getRootElement(), doc.getRootElement());
048    
049                    if(result.size() == 0)
050                            return null;
051                    else
052                            return "Files differ: "+docName+" doesn't match "+templateName+"\n"+StringUtils.join(result.iterator(), '\n');
053            }
054    
055            static List compareXmlElements(Element template, Element doc) {
056                    List result = new ArrayList();
057                    
058                    log.debug("Comparing elements: "+template.getUniquePath()+" and "+doc.getUniquePath());
059    
060                    Set attributes = new HashSet(doc.attributes());
061    
062                    // Compare attributes
063                    Iterator it = template.attributes().iterator();
064                    while(it.hasNext()) {
065                            Attribute attribute = (Attribute) it.next(); 
066                            Attribute docAttribute = doc.attribute(attribute.getQName());
067                            if(docAttribute == null)
068                                    result.add(template.getUniquePath()+": Expected: "+attribute.getQualifiedName()+"='"+attribute.getValue()+"', Received: No attribute");
069                            else {
070                                    String comp = compareValues(attribute.getValue(), docAttribute.getValue());
071                                    if(comp != null)
072                                            result.add(attribute.getUniquePath()+": "+comp);
073                                    attributes.remove(docAttribute);
074                            }
075                    }
076                    it = attributes.iterator();
077                    while(it.hasNext()) {
078                            Attribute attribute = (Attribute) it.next(); 
079                            result.add(template.getUniquePath()+": Expected: No Attribute, Received: "+attribute.getQualifiedName()+"='"+attribute.getValue()+"'");
080                    }
081    
082                    // Compare content
083                    if(template.isTextOnly()) {
084                            if(!doc.isTextOnly())
085                                    result.add(template.getUniquePath()+": Expected: '"+template.getText()+"', Received: Non-text content");
086                            else {
087                                    String comp = compareValues(template.getText(), doc.getText());
088                                    if(comp != null)
089                                            result.add(template.getUniquePath()+": "+comp);
090                            }
091                    }
092    
093    
094                    Set matchedElements = new HashSet();
095                    // Compare children
096                    it = template.elements().iterator();
097                    while(it.hasNext()) {
098                            Element child = (Element) it.next(); 
099                            List docElements = new ArrayList(doc.elements(child.getQName()));
100                            docElements.removeAll(matchedElements);
101                            if(docElements.size() == 0) {
102                                    result.add(template.getUniquePath()+": Expected: <"+child.getQualifiedName()+">, Received: No child element");
103                            }
104                            else {
105                                    // Loop through children, saving the best matching element.  Stop if we get a perfect match.
106                                    int fewestProbs=999999999;
107                                    Iterator docIit = docElements.iterator();
108                                    Element bestElement = null;
109                                    List bestResults = null;
110                                    while(docIit.hasNext()) {
111                                            Element docElement = (Element) docIit.next(); 
112                                            List childResult = compareXmlElements(child, docElement);
113                                            if(childResult.size() < fewestProbs) {
114                                                    fewestProbs = childResult.size();
115                                                    bestElement = docElement;
116                                                    bestResults = childResult;
117                                            }
118                                            if(fewestProbs == 0)
119                                                    break;
120                                    }
121                                    // Now take the best element as the match
122                                    matchedElements.add(bestElement);
123                                    result.addAll(bestResults);
124                                    if(docElements.size() > 1)
125                                            log.debug("Matched: "+child.getUniquePath()+" with "+bestElement.getUniquePath());
126                            }
127                    }                               
128                    Set unmatchedElements = new HashSet(doc.elements());
129                    unmatchedElements.removeAll(matchedElements);
130                    it = unmatchedElements.iterator();
131                    while(it.hasNext()) {
132                            Element element = (Element) it.next(); 
133                            result.add(template.getUniquePath()+": Expected: No Child Element, Received: <"+element.getQualifiedName()+">");
134                    }
135            
136                    return result;
137            }
138    
139            static String compareValues(String templateValue, String docValue) {
140                    if (templateValue.equals("*")) {
141                            return null;
142                    }
143                    else if (templateValue.startsWith("REGEX:")) {
144                            String regex = templateValue.substring("REGEX:".length());
145                            if (!Pattern.compile(regex).matcher(docValue).matches()) {
146                                    return "Expected: '"+templateValue+"', Received: '"+docValue+"'";                               
147                            }
148                    }
149                    else if(!templateValue.equals(docValue)) {
150                            return "Expected: '"+templateValue+"', Received: '"+docValue+"'";
151                    }
152                    return null;
153            }
154    }