001/*
002 * Configurate
003 * Copyright (C) zml and Configurate contributors
004 *
005 * Licensed under the Apache License, Version 2.0 (the "License");
006 * you may not use this file except in compliance with the License.
007 * You may obtain a copy of the License at
008 *
009 *    http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.spongepowered.configurate.xml;
018
019import org.checkerframework.checker.nullness.qual.NonNull;
020import org.checkerframework.checker.nullness.qual.Nullable;
021import org.spongepowered.configurate.AttributedConfigurationNode;
022import org.spongepowered.configurate.CommentedConfigurationNodeIntermediary;
023import org.spongepowered.configurate.ConfigurateException;
024import org.spongepowered.configurate.ConfigurationNode;
025import org.spongepowered.configurate.ConfigurationOptions;
026import org.spongepowered.configurate.loader.AbstractConfigurationLoader;
027import org.spongepowered.configurate.loader.CommentHandler;
028import org.spongepowered.configurate.loader.CommentHandlers;
029import org.spongepowered.configurate.loader.ParsingException;
030import org.spongepowered.configurate.util.UnmodifiableCollections;
031import org.w3c.dom.Document;
032import org.w3c.dom.Element;
033import org.w3c.dom.NamedNodeMap;
034import org.w3c.dom.Node;
035import org.w3c.dom.NodeList;
036import org.xml.sax.InputSource;
037import org.xml.sax.SAXException;
038import org.xml.sax.SAXParseException;
039
040import java.io.BufferedReader;
041import java.io.FileNotFoundException;
042import java.io.IOException;
043import java.io.Writer;
044import java.nio.file.NoSuchFileException;
045import java.util.ArrayList;
046import java.util.Collection;
047import java.util.Collections;
048import java.util.LinkedHashMap;
049import java.util.Map;
050import java.util.Objects;
051import java.util.Set;
052import java.util.stream.Collectors;
053import javax.xml.XMLConstants;
054import javax.xml.parsers.DocumentBuilder;
055import javax.xml.parsers.DocumentBuilderFactory;
056import javax.xml.parsers.ParserConfigurationException;
057import javax.xml.transform.OutputKeys;
058import javax.xml.transform.Transformer;
059import javax.xml.transform.TransformerConfigurationException;
060import javax.xml.transform.TransformerException;
061import javax.xml.transform.TransformerFactory;
062import javax.xml.transform.dom.DOMSource;
063import javax.xml.transform.stream.StreamResult;
064import javax.xml.validation.Schema;
065
066/**
067 * A loader for XML (Extensible Markup Language), using the native javax library
068 * for parsing and generation.
069 *
070 * @since 4.0.0
071 */
072public final class XmlConfigurationLoader extends AbstractConfigurationLoader<AttributedConfigurationNode> {
073
074    private static final Set<Class<?>> NATIVE_TYPES = UnmodifiableCollections.toSet(Double.class, Long.class,
075            Integer.class, Boolean.class, String.class, Number.class);
076
077    /**
078     * The prefix of lines within the header.
079     */
080    private static final String HEADER_PREFIX = "~";
081
082    private static final String ATTRIBUTE_TYPE = "configurate-type";
083
084    /**
085     * The user data used to store comments on nodes.
086     */
087    private static final String USER_DATA_COMMENT = "configurate-comment";
088
089    /**
090     * The property used to mark how many spaces should be used to indent.
091     */
092    private static final String INDENT_PROPERTY = "{http://xml.apache.org/xslt}indent-amount";
093
094    private static final String FEATURE_EXTERNAL_GENERAL_ENTITIES = "http://xml.org/sax/features/external-general-entities";
095
096    private static final String FEATURE_EXTERNAL_PARAMETER_ENTITIES = "http://xml.org/sax/features/external-parameter-entities";
097
098    private static final String FEATURE_LOAD_EXTERNAL_DTD = "http://apache.org/xml/features/nonvalidating/load-external-dtd";
099
100
101    /**
102     * Creates a new {@link XmlConfigurationLoader} builder.
103     *
104     * @return a new builder
105     * @since 4.0.0
106     */
107    @NonNull
108    public static Builder builder() {
109        return new Builder();
110    }
111
112    /**
113     * Builds a {@link XmlConfigurationLoader}.
114     *
115     * @since 4.0.0
116     */
117    public static final class Builder extends AbstractConfigurationLoader.Builder<Builder, XmlConfigurationLoader> {
118        private @Nullable Schema schema;
119        private String defaultTagName = "element";
120        private int indent = 2;
121        private boolean writeExplicitType = true;
122        private boolean resolvesExternalContent;
123        private boolean includeXmlDeclaration = true;
124
125        Builder() {
126        }
127
128        /**
129         * Sets the level of indentation the resultant loader should use.
130         *
131         * @param indent the indent level
132         * @return this builder (for chaining)
133         * @since 4.0.0
134         */
135        @NonNull
136        public Builder indent(final int indent) {
137            this.indent = indent;
138            return this;
139        }
140
141        /**
142         * Gets the level of indentation to be used by the resultant loader.
143         *
144         * @return the indent level
145         * @since 4.0.0
146         */
147        public int indent() {
148            return this.indent;
149        }
150
151        /**
152         * Sets the {@link Schema} the resultant loader should use.
153         *
154         * @param schema the schema
155         * @return this builder (for chaining)
156         * @since 4.0.0
157         */
158        public Builder schema(final @Nullable Schema schema) {
159            this.schema = schema;
160            return this;
161        }
162
163        /**
164         * Gets the {@link Schema} to be used by the resultant loader.
165         *
166         * @return the schema
167         * @since 4.0.0
168         */
169        public @Nullable Schema schema() {
170            return this.schema;
171        }
172
173        /**
174         * Sets the default tag name the resultant loader should use.
175         *
176         * @param defaultTagName the default tag name
177         * @return this builder (for chaining)
178         * @since 4.0.0
179         */
180        public Builder defaultTagName(final String defaultTagName) {
181            this.defaultTagName = defaultTagName;
182            return this;
183        }
184
185        /**
186         * Gets the default tag name to be used by the resultant loader.
187         *
188         * @return the default tag name
189         * @since 4.0.0
190         */
191        @NonNull
192        public String defaultTagName() {
193            return this.defaultTagName;
194        }
195
196        /**
197         * Sets if the resultant loader should write the explicit type of each
198         * node when saving nodes.
199         *
200         * <p>This is necessary in some cases, as XML has no explicit definition
201         * of an array or list. The loader is able to infer the type in some
202         * cases, but this is inaccurate in some cases, for example lists with
203         * only one element.</p>
204         *
205         * @param writeExplicitType if the loader should write explicit types
206         * @return this builder (for chaining)
207         * @since 4.0.0
208         */
209        public Builder writesExplicitType(final boolean writeExplicitType) {
210            this.writeExplicitType = writeExplicitType;
211            return this;
212        }
213
214        /**
215         * Gets if explicit type attributes should be written by the loader.
216         *
217         * <p>See the method doc at {@link #writesExplicitType(boolean)} for
218         * a more detailed explanation.</p>
219         *
220         * @return the default tag name
221         * @since 4.0.0
222         */
223        public boolean writesExplicitType() {
224            return this.writeExplicitType;
225        }
226
227        /**
228         * Sets if the resultant loader should include the XML declaration
229         * header when saving.
230         *
231         * @param includeXmlDeclaration if the XML declaration should be
232         *                              included
233         * @return this builder (for chaining)
234         * @since 4.0.0
235         */
236        public Builder includesXmlDeclaration(final boolean includeXmlDeclaration) {
237            this.includeXmlDeclaration = includeXmlDeclaration;
238            return this;
239        }
240
241        /**
242         * Gets if the resultant loader should include the XML declaration
243         * header when saving.
244         *
245         * @return if the XML declaration should be included
246         * @since 4.0.0
247         */
248        public boolean includesXmlDeclaration() {
249            return this.includeXmlDeclaration;
250        }
251
252        /**
253         * Sets whether external content should be resolved when loading data.
254         *
255         * <p>Resolving this content could result in network requests being
256         * made, and will allow configuration files to access arbitrary URLs
257         * This setting should only be enabled with caution.
258         *
259         * <p>Additionally, through use of features such as entity expansion and
260         * XInclude, documents can be crafted that will grow exponentially
261         * when parsed, requiring an amount of memory to store that may be
262         * greater than what is available for the JVM.
263         *
264         * <p>By default, this is false.
265         *
266         * @param resolvesExternalContent whether to resolve external entities
267         * @return this builder
268         * @since 4.0.0
269         */
270        public Builder resolvesExternalContent(final boolean resolvesExternalContent) {
271            this.resolvesExternalContent = resolvesExternalContent;
272            return this;
273        }
274
275        /**
276         * Get whether external content should be resolved.
277         *
278         * @return value, defaulting to false
279         * @since 4.0.0
280         */
281        public boolean resolvesExternalContent() {
282            return this.resolvesExternalContent;
283        }
284
285        @Override
286        public XmlConfigurationLoader build() {
287            defaultOptions(o -> o.nativeTypes(NATIVE_TYPES));
288            return new XmlConfigurationLoader(this);
289        }
290    }
291
292    private final @Nullable Schema schema;
293    private final String defaultTagName;
294    private final int indent;
295    private final boolean writeExplicitType;
296    private final boolean includeXmlDeclaration;
297    private final boolean resolvesExternalContent;
298
299    private XmlConfigurationLoader(final Builder builder) {
300        super(builder, new CommentHandler[] {CommentHandlers.XML_STYLE});
301        this.schema = builder.schema();
302        this.defaultTagName = builder.defaultTagName();
303        this.indent = builder.indent();
304        this.writeExplicitType = builder.writesExplicitType();
305        this.includeXmlDeclaration = builder.includesXmlDeclaration();
306        this.resolvesExternalContent = builder.resolvesExternalContent();
307    }
308
309    private DocumentBuilder newDocumentBuilder() throws ConfigurateException {
310        final DocumentBuilderFactory builderFactory = DocumentBuilderFactory.newInstance();
311        if (this.schema != null) {
312            builderFactory.setSchema(this.schema);
313        }
314        if (!this.resolvesExternalContent) {
315            // Settings based on https://cheatsheetseries.owasp.org/cheatsheets/XML_External_Entity_Prevention_Cheat_Sheet.html
316            try {
317                builderFactory.setFeature(FEATURE_EXTERNAL_GENERAL_ENTITIES, false);
318                builderFactory.setFeature(FEATURE_EXTERNAL_PARAMETER_ENTITIES, false);
319                builderFactory.setFeature(FEATURE_LOAD_EXTERNAL_DTD, false);
320            } catch (final ParserConfigurationException e) {
321                throw new ConfigurateException(e);
322            }
323            builderFactory.setXIncludeAware(false);
324            builderFactory.setExpandEntityReferences(false);
325        }
326
327        try {
328            return builderFactory.newDocumentBuilder();
329        } catch (final ParserConfigurationException e) {
330            throw new ConfigurateException(e);
331        }
332    }
333
334    private Transformer newTransformer() throws ConfigurateException {
335        final TransformerFactory transformerFactory = TransformerFactory.newInstance();
336        if (!this.resolvesExternalContent) {
337            transformerFactory.setAttribute(XMLConstants.ACCESS_EXTERNAL_DTD, "");
338            transformerFactory.setAttribute(XMLConstants.ACCESS_EXTERNAL_STYLESHEET, "");
339        }
340        try {
341            final Transformer transformer = transformerFactory.newTransformer();
342
343            // we write the header ourselves.
344            transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
345
346            if (this.indent > 0) {
347                transformer.setOutputProperty(OutputKeys.INDENT, "yes");
348                transformer.setOutputProperty(INDENT_PROPERTY, Integer.toString(this.indent));
349            }
350            return transformer;
351        } catch (final TransformerConfigurationException e) {
352            throw new ConfigurateException(e);
353        }
354    }
355
356    @Override
357    public @NonNull AttributedConfigurationNode load(@NonNull ConfigurationOptions options) throws ParsingException {
358        if (source == null) {
359            throw new ParsingException(-1, -1, "", "No source present to read from!", null);
360        }
361        try (BufferedReader reader = source.call()) {
362            final DocumentBuilder documentBuilder = newDocumentBuilder();
363
364            final Document document;
365            try {
366                document = documentBuilder.parse(new InputSource(reader));
367            } catch (final SAXParseException ex) {
368                throw new ParsingException(ex.getLineNumber(), ex.getColumnNumber(), "", ex.getMessage(), ex.getCause());
369            } catch (final SAXException e) {
370                throw new ParsingException(-1, -1, null, null, e);
371            }
372
373            final NodeList children = document.getChildNodes();
374            for (int i = 0; i < children.getLength(); ++i) {
375                final Node child = children.item(i);
376                if (child.getNodeType() == Node.COMMENT_NODE) {
377                    options = options.header(unwrapHeader(child.getTextContent().trim()));
378                } else if (child.getNodeType() == Node.ELEMENT_NODE) {
379                    final AttributedConfigurationNode node = createNode(options);
380                    readElement(child, node);
381                    return node;
382                }
383            }
384            // empty document, fall through
385        } catch (final FileNotFoundException | NoSuchFileException e) {
386            // Squash -- there's nothing to read
387        } catch (final ParsingException ex) {
388            throw ex;
389        } catch (final Exception e) {
390            throw new ParsingException(-1, -1, "", null, e);
391        }
392        return createNode(options);
393    }
394
395    /**
396     * Given a single comment node's comment, clear any prefix lines.
397     *
398     * @param headerContent the content of a header
399     * @return a formatted header, with lines separated by {@link #CONFIGURATE_LINE_SEPARATOR}
400     */
401    private String unwrapHeader(final String headerContent) {
402        if (headerContent.isEmpty()) {
403            return headerContent;
404        }
405        // TODO: 4.0 may have changed behaviour here when moving away from Guava
406        return CONFIGURATE_LINE_PATTERN.splitAsStream(headerContent)
407                .map(line -> {
408                    final String trimmedLine = line.trim();
409                    if (trimmedLine.startsWith(HEADER_PREFIX)) {
410                        line = line.substring(line.indexOf(HEADER_PREFIX) + 1);
411                    }
412
413                    if (line.length() > 0 && line.charAt(0) == ' ') {
414                        line = line.substring(1);
415                    }
416                    return line;
417                }).filter(line -> !line.isEmpty())
418                .collect(Collectors.joining(CONFIGURATE_LINE_SEPARATOR));
419    }
420
421    @Override
422    protected void loadInternal(final AttributedConfigurationNode node, final BufferedReader reader) {
423        throw new UnsupportedOperationException("XMLConfigurationLoader provides custom loading logic to handle headers");
424    }
425
426    private enum NodeType {
427        MAP, LIST
428    }
429
430    private void readElement(final Node from, final AttributedConfigurationNode to) {
431        @Nullable NodeType type = null;
432
433        // copy the name of the tag
434        to.tagName(from.getNodeName());
435
436        final String potentialComment = (String) from.getUserData(USER_DATA_COMMENT);
437        if (potentialComment != null) {
438            to.comment(potentialComment);
439        }
440
441        // copy attributes
442        if (from.hasAttributes()) {
443            final NamedNodeMap attributes = from.getAttributes();
444            for (int i = 0; i < attributes.getLength(); i++) {
445                final Node attribute = attributes.item(i);
446                final String key = attribute.getNodeName();
447                final String value = attribute.getNodeValue();
448
449                // read the type of the node
450                if (key.equals(ATTRIBUTE_TYPE)) {
451                    if (value.equals("map")) {
452                        type = NodeType.MAP;
453                    } else if (value.equals("list")) {
454                        type = NodeType.LIST;
455                    }
456
457                    // don't add internal configurate attributes to the node
458                    continue;
459                }
460
461                to.addAttribute(key, value);
462            }
463        }
464
465        // read out the child nodes into a multimap
466        final Map<String, Collection<Node>> children = new LinkedHashMap<>();
467        if (from.hasChildNodes()) {
468            final StringBuilder comment = new StringBuilder();
469            final NodeList childNodes = from.getChildNodes();
470            for (int i = 0; i < childNodes.getLength(); i++) {
471                final Node child = childNodes.item(i);
472                if (child.getNodeType() == Node.ELEMENT_NODE) {
473                    children.computeIfAbsent(child.getNodeName(), $ -> new ArrayList<>()).add(child);
474                    if (comment.length() > 0) {
475                        child.setUserData(USER_DATA_COMMENT, comment.toString(), null);
476                        comment.setLength(0);
477                    }
478                } else if (child.getNodeType() == Node.COMMENT_NODE) {
479                    if (comment.length() > 0) {
480                        comment.append('\n');
481                    }
482
483                    comment.append(child.getTextContent().trim());
484                }
485            }
486        }
487
488        // if there are no child nodes present, assume it's a scalar value
489        if (children.isEmpty()) {
490            to.raw(parseValue(from.getTextContent()));
491            return;
492        }
493
494        // if type is null, we need to infer what type the element is
495        if (type == null) {
496            // if there are no duplicate keys, we can infer that it is a map
497            // otherwise, assume it's a list
498            type = NodeType.MAP;
499            for (Collection<Node> child : children.values()) {
500                if (child.size() > 1) {
501                    type = NodeType.LIST;
502                    break;
503                }
504            }
505        }
506
507        if (type == NodeType.MAP) {
508            to.raw(Collections.emptyMap());
509        } else {
510            to.raw(Collections.emptyList());
511        }
512
513        // read out the elements
514        for (Map.Entry<String, Collection<Node>> entry : children.entrySet()) {
515            AttributedConfigurationNode child;
516            if (type == NodeType.MAP) {
517                child = to.node(entry.getKey());
518                readElement(entry.getValue().iterator().next(), child);
519            } else {
520                for (Node element : entry.getValue()) {
521                    child = to.appendListNode();
522                    readElement(element, child);
523                }
524            }
525        }
526    }
527
528    @Override
529    protected void writeHeaderInternal(final Writer writer) throws IOException {
530        if (this.includeXmlDeclaration) {
531            writer.write("<?xml version=\"1.0\" encoding=\"UTF-8\"?>");
532            writer.write(SYSTEM_LINE_SEPARATOR);
533        }
534    }
535
536    @Override
537    protected void saveInternal(final ConfigurationNode node, final Writer writer) throws ConfigurateException {
538        final DocumentBuilder documentBuilder = newDocumentBuilder();
539        final Document document = documentBuilder.newDocument();
540
541        final @Nullable Node comment = createCommentNode(document, node);
542        if (comment != null) {
543            document.appendChild(comment);
544        }
545
546        document.appendChild(writeNode(document, node, null));
547
548        final Transformer transformer = newTransformer();
549        final DOMSource source = new DOMSource(document);
550        try {
551            transformer.transform(source, new StreamResult(writer));
552        } catch (final TransformerException e) {
553            throw new ConfigurateException(node, e);
554        }
555    }
556
557    private void appendCommentIfNecessary(final Element parent, final ConfigurationNode node) {
558        final @Nullable Node possibleComment = createCommentNode(parent.getOwnerDocument(), node);
559        if (possibleComment != null) {
560            parent.appendChild(possibleComment);
561        }
562    }
563
564    private @Nullable Node createCommentNode(final Document doc, final ConfigurationNode node) {
565        if (node instanceof CommentedConfigurationNodeIntermediary<?>) {
566            final @Nullable String comment = ((CommentedConfigurationNodeIntermediary<?>) node).comment();
567            if (comment != null) {
568                return doc.createComment(" " + comment.trim() + " ");
569            }
570        }
571        return null;
572    }
573
574    private Element writeNode(final Document document, final ConfigurationNode node, final @Nullable String forcedTag) {
575        String tag = this.defaultTagName;
576        Map<String, String> attributes = Collections.emptyMap();
577
578        if (node instanceof AttributedConfigurationNode) {
579            final AttributedConfigurationNode attributedNode = (AttributedConfigurationNode) node;
580            tag = attributedNode.tagName();
581            attributes = attributedNode.attributes();
582        }
583
584        final Element element = document.createElement(forcedTag == null ? tag : forcedTag);
585        for (final Map.Entry<String, String> attribute : attributes.entrySet()) {
586            element.setAttribute(attribute.getKey(), attribute.getValue());
587        }
588
589        if (node.isMap()) {
590            for (final Map.Entry<Object, ? extends ConfigurationNode> child : node.childrenMap().entrySet()) {
591                appendCommentIfNecessary(element, child.getValue());
592                element.appendChild(writeNode(document, child.getValue(), child.getKey().toString()));
593            }
594        } else if (node.isList()) {
595            if (this.writeExplicitType) {
596                element.setAttribute(ATTRIBUTE_TYPE, "list");
597            }
598            for (final ConfigurationNode child : node.childrenList()) {
599                appendCommentIfNecessary(element, child);
600                element.appendChild(writeNode(document, child, null));
601            }
602        } else {
603            element.appendChild(document.createTextNode(Objects.toString(node.rawScalar())));
604        }
605
606        return element;
607    }
608
609    @Override
610    public AttributedConfigurationNode createNode(ConfigurationOptions options) {
611        options = options.nativeTypes(NATIVE_TYPES);
612        return AttributedConfigurationNode.root("root", options);
613    }
614
615    private static Object parseValue(final String value) {
616        if (value.equals("true") || value.equals("false")) {
617            return Boolean.parseBoolean(value);
618        }
619
620        try {
621            final double doubleValue = Double.parseDouble(value);
622            if (isInteger(doubleValue)) {
623                final long longValue = Long.parseLong(value); // prevent losing precision
624                final int intValue = (int) longValue;
625                if (longValue == intValue) {
626                    return intValue;
627                } else {
628                    return longValue;
629                }
630            }
631            return doubleValue;
632        } catch (final NumberFormatException e) {
633            return value;
634        }
635    }
636
637    private static boolean isInteger(final double value) {
638        return !Double.isNaN(value) && Double.isFinite(value) && value == Math.rint(value);
639    }
640
641}