001 /* The Broad Institute SOFTWARE COPYRIGHT NOTICE AGREEMENT
002 This software and its documentation are copyright 2003 by the Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
003 This software is supplied without any warranty or guaranteed support whatsoever. Neither the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
004 */
005 package calhoun.util;
006
007 import java.util.ArrayList;
008 import java.util.HashSet;
009 import java.util.Iterator;
010 import java.util.List;
011 import java.util.Set;
012 import java.util.regex.Pattern;
013
014 import org.apache.commons.lang.StringUtils;
015 import org.apache.commons.logging.Log;
016 import org.apache.commons.logging.LogFactory;
017 import org.dom4j.Attribute;
018 import org.dom4j.Document;
019 import org.dom4j.Element;
020
021 /** Utility class for diffing XML
022 */
023 public class XmlDiff {
024 private static final Log log = LogFactory.getLog(XmlDiff.class);
025
026 /** Should not be constructed. All static methods.
027 *
028 */
029 private XmlDiff() {
030 super();
031 }
032
033 /** Compares two XML files. the first is the template, which can contain '*' as a wildcard attribute or element value. Returns a String describing the differences or null if the files match.
034 * The comparison is an XML aware comparison that ignores whitespace differences and attribute order and also allows wildcard values.
035 Just put '*' as the value for any element content or attribute value and any plugin output will match. It also ignores ordering of
036 duplicated child elements. Therefore if <BlastRun> has 50 <BlastAlignment> child objects, the order does not have to match in the
037 2 documents.*/
038 public static String compareFiles(String templateName, String docName) {
039 log.debug("Comparing files: "+templateName+" and "+docName);
040 Document template = XmlUtil.parseFile(templateName);
041 Document doc = XmlUtil.parseFile(docName);
042 List result = new ArrayList();
043
044 if(template.getRootElement().getName() != doc.getRootElement().getName())
045 result.add("Root nodes don't match. Expected: '"+template.getRootElement().getName()+"', Received: "+doc.getRootElement().getName());
046 else
047 result = compareXmlElements(template.getRootElement(), doc.getRootElement());
048
049 if(result.size() == 0)
050 return null;
051 else
052 return "Files differ: "+docName+" doesn't match "+templateName+"\n"+StringUtils.join(result.iterator(), '\n');
053 }
054
055 static List compareXmlElements(Element template, Element doc) {
056 List result = new ArrayList();
057
058 log.debug("Comparing elements: "+template.getUniquePath()+" and "+doc.getUniquePath());
059
060 Set attributes = new HashSet(doc.attributes());
061
062 // Compare attributes
063 Iterator it = template.attributes().iterator();
064 while(it.hasNext()) {
065 Attribute attribute = (Attribute) it.next();
066 Attribute docAttribute = doc.attribute(attribute.getQName());
067 if(docAttribute == null)
068 result.add(template.getUniquePath()+": Expected: "+attribute.getQualifiedName()+"='"+attribute.getValue()+"', Received: No attribute");
069 else {
070 String comp = compareValues(attribute.getValue(), docAttribute.getValue());
071 if(comp != null)
072 result.add(attribute.getUniquePath()+": "+comp);
073 attributes.remove(docAttribute);
074 }
075 }
076 it = attributes.iterator();
077 while(it.hasNext()) {
078 Attribute attribute = (Attribute) it.next();
079 result.add(template.getUniquePath()+": Expected: No Attribute, Received: "+attribute.getQualifiedName()+"='"+attribute.getValue()+"'");
080 }
081
082 // Compare content
083 if(template.isTextOnly()) {
084 if(!doc.isTextOnly())
085 result.add(template.getUniquePath()+": Expected: '"+template.getText()+"', Received: Non-text content");
086 else {
087 String comp = compareValues(template.getText(), doc.getText());
088 if(comp != null)
089 result.add(template.getUniquePath()+": "+comp);
090 }
091 }
092
093
094 Set matchedElements = new HashSet();
095 // Compare children
096 it = template.elements().iterator();
097 while(it.hasNext()) {
098 Element child = (Element) it.next();
099 List docElements = new ArrayList(doc.elements(child.getQName()));
100 docElements.removeAll(matchedElements);
101 if(docElements.size() == 0) {
102 result.add(template.getUniquePath()+": Expected: <"+child.getQualifiedName()+">, Received: No child element");
103 }
104 else {
105 // Loop through children, saving the best matching element. Stop if we get a perfect match.
106 int fewestProbs=999999999;
107 Iterator docIit = docElements.iterator();
108 Element bestElement = null;
109 List bestResults = null;
110 while(docIit.hasNext()) {
111 Element docElement = (Element) docIit.next();
112 List childResult = compareXmlElements(child, docElement);
113 if(childResult.size() < fewestProbs) {
114 fewestProbs = childResult.size();
115 bestElement = docElement;
116 bestResults = childResult;
117 }
118 if(fewestProbs == 0)
119 break;
120 }
121 // Now take the best element as the match
122 matchedElements.add(bestElement);
123 result.addAll(bestResults);
124 if(docElements.size() > 1)
125 log.debug("Matched: "+child.getUniquePath()+" with "+bestElement.getUniquePath());
126 }
127 }
128 Set unmatchedElements = new HashSet(doc.elements());
129 unmatchedElements.removeAll(matchedElements);
130 it = unmatchedElements.iterator();
131 while(it.hasNext()) {
132 Element element = (Element) it.next();
133 result.add(template.getUniquePath()+": Expected: No Child Element, Received: <"+element.getQualifiedName()+">");
134 }
135
136 return result;
137 }
138
139 static String compareValues(String templateValue, String docValue) {
140 if (templateValue.equals("*")) {
141 return null;
142 }
143 else if (templateValue.startsWith("REGEX:")) {
144 String regex = templateValue.substring("REGEX:".length());
145 if (!Pattern.compile(regex).matcher(docValue).matches()) {
146 return "Expected: '"+templateValue+"', Received: '"+docValue+"'";
147 }
148 }
149 else if(!templateValue.equals(docValue)) {
150 return "Expected: '"+templateValue+"', Received: '"+docValue+"'";
151 }
152 return null;
153 }
154 }