001 /**
002 * Copyright (c) 2010 Yahoo! Inc. All rights reserved.
003 * Licensed under the Apache License, Version 2.0 (the "License");
004 * you may not use this file except in compliance with the License.
005 * You may obtain a copy of the License at
006 *
007 * http://www.apache.org/licenses/LICENSE-2.0
008 *
009 * Unless required by applicable law or agreed to in writing, software
010 * distributed under the License is distributed on an "AS IS" BASIS,
011 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
012 * See the License for the specific language governing permissions and
013 * limitations under the License. See accompanying LICENSE file.
014 */
015 package org.apache.oozie.util;
016
017 import java.io.ByteArrayInputStream;
018 import java.io.IOException;
019 import java.io.InputStream;
020 import java.io.StringReader;
021 import java.io.StringWriter;
022 import java.util.Enumeration;
023 import java.util.Iterator;
024 import java.util.List;
025 import java.util.Map;
026 import java.util.Properties;
027
028 import javax.xml.XMLConstants;
029 import javax.xml.parsers.DocumentBuilderFactory;
030 import javax.xml.transform.Result;
031 import javax.xml.transform.Source;
032 import javax.xml.transform.Transformer;
033 import javax.xml.transform.TransformerFactory;
034 import javax.xml.transform.dom.DOMSource;
035 import javax.xml.transform.stream.StreamResult;
036 import javax.xml.transform.stream.StreamSource;
037 import javax.xml.validation.Schema;
038 import javax.xml.validation.SchemaFactory;
039 import javax.xml.validation.Validator;
040
041 import org.apache.hadoop.conf.Configuration;
042 import org.apache.oozie.service.SchemaService;
043 import org.apache.oozie.service.Services;
044 import org.apache.oozie.service.SchemaService.SchemaName;
045 import org.jdom.Comment;
046 import org.jdom.Document;
047 import org.jdom.Element;
048 import org.jdom.JDOMException;
049 import org.jdom.input.SAXBuilder;
050 import org.jdom.output.Format;
051 import org.jdom.output.XMLOutputter;
052 import org.xml.sax.EntityResolver;
053 import org.xml.sax.InputSource;
054 import org.xml.sax.SAXException;
055
056 /**
057 * XML utility methods.
058 */
059 public class XmlUtils {
060 public static final String SLA_NAME_SPACE_URI = "uri:oozie:sla:0.1";
061
062 private static class NoExternalEntityEntityResolver implements EntityResolver {
063
064 public InputSource resolveEntity(String publicId, String systemId) throws SAXException, IOException {
065 return new InputSource(new ByteArrayInputStream(new byte[0]));
066 }
067
068 }
069
070 private static SAXBuilder createSAXBuilder() {
071 SAXBuilder saxBuilder = new SAXBuilder();
072
073 //THIS IS NOT WORKING
074 //saxBuilder.setFeature("http://xml.org/sax/features/external-general-entities", false);
075
076 //INSTEAD WE ARE JUST SETTING AN EntityResolver that does not resolve entities
077 saxBuilder.setEntityResolver(new NoExternalEntityEntityResolver());
078 return saxBuilder;
079 }
080
081 /**
082 * Remove comments from any Xml String.
083 *
084 * @param xmlStr XML string to remove comments.
085 * @return String after removing comments.
086 * @throws JDOMException thrown if an error happend while XML parsing.
087 */
088 public static String removeComments(String xmlStr) throws JDOMException {
089 if (xmlStr == null) {
090 return null;
091 }
092 try {
093 SAXBuilder saxBuilder = createSAXBuilder();
094 Document document = saxBuilder.build(new StringReader(xmlStr));
095 removeComments(document);
096 return prettyPrint(document.getRootElement()).toString();
097 }
098 catch (IOException ex) {
099 throw new RuntimeException("It should not happen, " + ex.getMessage(), ex);
100 }
101 }
102
103 private static void removeComments(List l) {
104 for (Iterator i = l.iterator(); i.hasNext();) {
105 Object node = i.next();
106 if (node instanceof Comment) {
107 i.remove();
108 }
109 else {
110 if (node instanceof Element) {
111 removeComments(((Element) node).getContent());
112 }
113 }
114 }
115 }
116
117 private static void removeComments(Document doc) {
118 removeComments(doc.getContent());
119 }
120
121 /**
122 * Parse a string assuming it is a valid XML document and return an JDOM Element for it.
123 *
124 * @param xmlStr XML string to parse.
125 * @return JDOM element for the parsed XML string.
126 * @throws JDOMException thrown if an error happend while XML parsing.
127 */
128 public static Element parseXml(String xmlStr) throws JDOMException {
129 ParamChecker.notNull(xmlStr, "xmlStr");
130 try {
131 SAXBuilder saxBuilder = createSAXBuilder();
132 Document document = saxBuilder.build(new StringReader(xmlStr));
133 return document.getRootElement();
134 }
135 catch (IOException ex) {
136 throw new RuntimeException("It should not happen, " + ex.getMessage(), ex);
137 }
138 }
139
140 /**
141 * Parse a inputstream assuming it is a valid XML document and return an JDOM Element for it.
142 *
143 * @param is inputstream to parse.
144 * @return JDOM element for the parsed XML string.
145 * @throws JDOMException thrown if an error happend while XML parsing.
146 * @throws IOException thrown if an IO error occurred.
147 */
148 public static Element parseXml(InputStream is) throws JDOMException, IOException {
149 ParamChecker.notNull(is, "is");
150 SAXBuilder saxBuilder = createSAXBuilder();
151 Document document = saxBuilder.build(is);
152 return document.getRootElement();
153 }
154
155 /**
156 * //TODO move this to action registry method Return the value of an attribute from the root element of an XML
157 * document.
158 *
159 * @param filePath path of the XML document.
160 * @param attributeName attribute to retrieve value for.
161 * @return value of the specified attribute.
162 */
163 public static String getRootAttribute(String filePath, String attributeName) {
164 ParamChecker.notNull(filePath, "filePath");
165 ParamChecker.notNull(attributeName, "attributeName");
166 SAXBuilder saxBuilder = createSAXBuilder();
167 try {
168 Document doc = saxBuilder.build(Thread.currentThread().getContextClassLoader().getResourceAsStream(filePath));
169 return doc.getRootElement().getAttributeValue(attributeName);
170 }
171 catch (JDOMException e) {
172 throw new RuntimeException();
173 }
174 catch (IOException e) {
175 throw new RuntimeException();
176 }
177 }
178
179 /**
180 * Pretty print string representation of an XML document that generates the pretty print on lazy mode when the
181 * {@link #toString} method is invoked.
182 */
183 public static class PrettyPrint {
184 private String str;
185 private Element element;
186
187 private PrettyPrint(String str) {
188 this.str = str;
189 }
190
191 private PrettyPrint(Element element) {
192 this.element = ParamChecker.notNull(element, "element");
193 }
194
195 /**
196 * Return the pretty print representation of an XML document.
197 *
198 * @return the pretty print representation of an XML document.
199 */
200 @Override
201 public String toString() {
202 if (str != null) {
203 return str;
204 }
205 else {
206 XMLOutputter outputter = new XMLOutputter();
207 StringWriter stringWriter = new StringWriter();
208 outputter.setFormat(Format.getPrettyFormat());
209 try {
210 outputter.output(element, stringWriter);
211 }
212 catch (Exception ex) {
213 throw new RuntimeException(ex);
214 }
215 return stringWriter.toString();
216 }
217 }
218 }
219
220 /**
221 * Return a pretty print string for a JDOM Element.
222 *
223 * @param element JDOM element.
224 * @return pretty print of the given JDOM Element.
225 */
226 public static PrettyPrint prettyPrint(Element element) {
227 return new PrettyPrint(element);
228
229 }
230
231 /**
232 * Return a pretty print string for a XML string. If the given string is not valid XML it returns the original
233 * string.
234 *
235 * @param xmlStr XML string.
236 * @return prettyprint of the given XML string or the original string if the given string is not valid XML.
237 */
238 public static PrettyPrint prettyPrint(String xmlStr) {
239 try {
240 return new PrettyPrint(parseXml(xmlStr));
241 }
242 catch (Exception e) {
243 return new PrettyPrint(xmlStr);
244 }
245 }
246
247 /**
248 * Return a pretty print string for a Configuration object.
249 *
250 * @param conf Configuration object.
251 * @return prettyprint of the given Configuration object.
252 */
253 public static PrettyPrint prettyPrint(Configuration conf) {
254 Element root = new Element("configuration");
255 for (Map.Entry<String, String> entry : conf) {
256 Element property = new Element("property");
257 Element name = new Element("name");
258 name.setText(entry.getKey());
259 Element value = new Element("value");
260 value.setText(entry.getValue());
261 property.addContent(name);
262 property.addContent(value);
263 root.addContent(property);
264 }
265 return new PrettyPrint(root);
266 }
267
268 /**
269 * Schema validation for a given xml. <p/>
270 *
271 * @param schema for validation
272 * @param xml to be validated
273 */
274 public static void validateXml(Schema schema, String xml) throws SAXException, IOException {
275
276 Validator validator = schema.newValidator();
277 validator.validate(new StreamSource(new ByteArrayInputStream(xml.getBytes())));
278 }
279
280 /**
281 * Create schema object for the given xsd
282 *
283 * @param is inputstream to schema.
284 * @return the schema object.
285 */
286 public static Schema createSchema(InputStream is) {
287 SchemaFactory factory = SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI);
288 StreamSource src = new StreamSource(is);
289 try {
290 return factory.newSchema(src);
291 }
292 catch (SAXException e) {
293 throw new RuntimeException(e.getMessage(), e);
294 }
295 }
296
297 public static void validateData(String xmlData, SchemaName xsdFile) throws SAXException, IOException {
298 if (xmlData == null || xmlData.length() == 0) {
299 return;
300 }
301 javax.xml.validation.Schema schema = Services.get().get(SchemaService.class).getSchema(xsdFile);
302 validateXml(schema, xmlData);
303 }
304
305 /**
306 * Convert Properties to string
307 *
308 * @param props
309 * @return xml string
310 * @throws IOException
311 */
312 public static String writePropToString(Properties props) throws IOException {
313 try {
314 org.w3c.dom.Document doc = DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument();
315 org.w3c.dom.Element conf = doc.createElement("configuration");
316 doc.appendChild(conf);
317 conf.appendChild(doc.createTextNode("\n"));
318 for (Enumeration e = props.keys(); e.hasMoreElements();) {
319 String name = (String) e.nextElement();
320 Object object = props.get(name);
321 String value;
322 if (object instanceof String) {
323 value = (String) object;
324 }
325 else {
326 continue;
327 }
328 org.w3c.dom.Element propNode = doc.createElement("property");
329 conf.appendChild(propNode);
330
331 org.w3c.dom.Element nameNode = doc.createElement("name");
332 nameNode.appendChild(doc.createTextNode(name.trim()));
333 propNode.appendChild(nameNode);
334
335 org.w3c.dom.Element valueNode = doc.createElement("value");
336 valueNode.appendChild(doc.createTextNode(value.trim()));
337 propNode.appendChild(valueNode);
338
339 conf.appendChild(doc.createTextNode("\n"));
340 }
341
342 Source source = new DOMSource(doc);
343 StringWriter stringWriter = new StringWriter();
344 Result result = new StreamResult(stringWriter);
345 TransformerFactory factory = TransformerFactory.newInstance();
346 Transformer transformer = factory.newTransformer();
347 transformer.transform(source, result);
348
349 return stringWriter.getBuffer().toString();
350 }
351 catch (Exception e) {
352 throw new IOException(e);
353 }
354 }
355
356 }