Skip to content
Permalink
Browse files Browse the repository at this point in the history
XCOMMONS-2426: Provide a component for filtering safe HTML elements a…
…nd attributes

* Add an interface and two implementations for an HTML element sanitizer.
* Let the default implementation dispatch to the different
  implementations depending on a configuration.
* Allow overriding the hint from the execution context to allow a
  context to be more permissive than another.
* Add configuration options for allowed elements/attributes
* Add tests.
  • Loading branch information
michitux committed Jun 30, 2022
1 parent 67aeae5 commit 4a185e0
Show file tree
Hide file tree
Showing 14 changed files with 1,484 additions and 1 deletion.
7 changes: 6 additions & 1 deletion xwiki-commons-core/xwiki-commons-xml/pom.xml
Expand Up @@ -32,7 +32,7 @@
<packaging>jar</packaging>
<description>XWiki Commons - XML</description>
<properties>
<xwiki.jacoco.instructionRatio>0.72</xwiki.jacoco.instructionRatio>
<xwiki.jacoco.instructionRatio>0.82</xwiki.jacoco.instructionRatio>
<!-- There's a utility class with lots of features, allow it to have many dependencies;
There's a SAX event listener, which requires complex code -->
<checkstyle.suppressions.location>${basedir}/src/main/checkstyle/checkstyle-suppressions.xml
Expand All @@ -53,6 +53,11 @@
<artifactId>xwiki-commons-context</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>org.xwiki.commons</groupId>
<artifactId>xwiki-commons-configuration-api</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-lang3</artifactId>
Expand Down
Expand Up @@ -36,4 +36,9 @@
<!-- XWikiDomSerializer copied from DomSerializer -->
<suppress checks="CyclomaticComplexity" files="XWikiDOMSerializer" />
<suppress checks="NPathComplexity" files="XWikiDOMSerializer" />

<!-- These files have lists of strings copied from a source, making them constants would complicate updating from
upstream. -->
<suppress checks="MultipleStringLiterals"
files="SecureHTMLElementSanitizer.java|HTMLDefinitions.java|MathMLDefinitions.java|SVGDefinitions.java"/>
</suppressions>
@@ -0,0 +1,55 @@
/*
* See the NOTICE file distributed with this work for additional
* information regarding copyright ownership.
*
* This is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation; either version 2.1 of
* the License, or (at your option) any later version.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this software; if not, write to the Free
* Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
* 02110-1301 USA, or see the FSF site: http://www.fsf.org.
*/
package org.xwiki.xml.html;

import org.xwiki.component.annotation.Role;
import org.xwiki.stability.Unstable;

/**
* Provides methods to check if HTML elements and attributes/attribute values are considered safe.
* <p>
* This also includes SVG and MathML elements and attributes.
*
* @version $Id$
* @since 14.6RC1
*/
@Role
@Unstable
public interface HTMLElementSanitizer
{
/**
* The key under which a hint can be stored that will be used by the default implementation.
*/
String EXECUTION_CONTEXT_HINT_KEY = "xml.html.htmlElementSanitizerHint";

/**
* @param elementName the name of the HTML element
* @return {@code true} if the given element is allowed in principle (given appropriate attributes)
*/
boolean isElementAllowed(String elementName);

/**
* @param elementName the element for which the attributes shall be checked
* @param attributeName the attributes to check
* @param value the value of the attribute
* @return {@code true} if the attribute with this value is considered safe
*/
boolean isAttributeAllowed(String elementName, String attributeName, String value);
}
@@ -0,0 +1,135 @@
/*
* See the NOTICE file distributed with this work for additional
* information regarding copyright ownership.
*
* This is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation; either version 2.1 of
* the License, or (at your option) any later version.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this software; if not, write to the Free
* Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
* 02110-1301 USA, or see the FSF site: http://www.fsf.org.
*/
package org.xwiki.xml.internal.html;

import javax.inject.Inject;
import javax.inject.Named;
import javax.inject.Provider;
import javax.inject.Singleton;

import org.apache.commons.lang3.exception.ExceptionUtils;
import org.slf4j.Logger;
import org.xwiki.component.annotation.Component;
import org.xwiki.component.manager.ComponentLookupException;
import org.xwiki.component.manager.ComponentManager;
import org.xwiki.component.phase.Initializable;
import org.xwiki.component.phase.InitializationException;
import org.xwiki.configuration.ConfigurationSource;
import org.xwiki.context.Execution;
import org.xwiki.context.ExecutionContext;
import org.xwiki.stability.Unstable;
import org.xwiki.xml.html.HTMLElementSanitizer;

/**
* Default {@link HTMLElementSanitizer} that loads the implementation chosen by the configuration.
*
* @version $Id$
* @since 14.6RC1
*/
@Component
@Singleton
@Unstable
public class DefaultHTMLElementSanitizer implements HTMLElementSanitizer, Initializable
{
private static final String CONFIGURATION_KEY = "xml.htmlElementSanitizer";

private HTMLElementSanitizer implementation;

@Inject
@Named("restricted")
private Provider<ConfigurationSource> configurationSourceProvider;

@Inject
private Execution execution;

@Inject
private Provider<ComponentManager> componentManagerProvider;

@Inject
private Logger logger;

@Override
public void initialize() throws InitializationException
{

ConfigurationSource configurationSource = this.configurationSourceProvider.get();

String hint;
if (configurationSource != null) {
hint = configurationSource.getProperty(CONFIGURATION_KEY, SecureHTMLElementSanitizer.HINT);
} else {
hint = SecureHTMLElementSanitizer.HINT;
}

try {
this.implementation = loadImplementationWithSecureFallback(hint);
} catch (ComponentLookupException ex) {
throw new InitializationException("Couldn't initialize the default secure HTMLElementSanitizer", ex);
}
}

private HTMLElementSanitizer loadImplementationWithSecureFallback(String hint) throws ComponentLookupException
{
ComponentManager componentManager = this.componentManagerProvider.get();
HTMLElementSanitizer result;

try {
result = componentManager.getInstance(HTMLElementSanitizer.class, hint);
} catch (ComponentLookupException e) {
this.logger.error("Couldn't load the configured HTMLElementSanitizer with hint [{}], falling back to the "
+ "default secure implementation: {}", hint, ExceptionUtils.getRootCauseMessage(e));
result = componentManager.getInstance(HTMLElementSanitizer.class, SecureHTMLElementSanitizer.HINT);
}

return result;
}

private HTMLElementSanitizer getImplementation()
{
ExecutionContext context = this.execution.getContext();

HTMLElementSanitizer result = this.implementation;

if (context != null && context.hasProperty(HTMLElementSanitizer.EXECUTION_CONTEXT_HINT_KEY)) {
String hint = (String) context.getProperty(HTMLElementSanitizer.EXECUTION_CONTEXT_HINT_KEY);

try {
result = this.componentManagerProvider.get().getInstance(HTMLElementSanitizer.class, hint);
} catch (ComponentLookupException e) {
this.logger.error("Couldn't load the HTMLElementSanitizer with hint [{}] from the execution context, "
+ "falling back to the configured implementation: {}", hint, ExceptionUtils.getRootCauseMessage(e));
}
}

return result;
}

@Override
public boolean isElementAllowed(String elementName)
{
return getImplementation().isElementAllowed(elementName);
}

@Override
public boolean isAttributeAllowed(String elementName, String attributeName, String value)
{
return getImplementation().isAttributeAllowed(elementName, attributeName, value);
}
}
@@ -0,0 +1,110 @@
/*
* See the NOTICE file distributed with this work for additional
* information regarding copyright ownership.
*
* This is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation; either version 2.1 of
* the License, or (at your option) any later version.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this software; if not, write to the Free
* Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
* 02110-1301 USA, or see the FSF site: http://www.fsf.org.
*/
/*
* Alternatively, at your choice, the contents of this file may be used under the terms of the Mozilla Public License,
* v. 2.0. If a copy of the MPL was not distributed with this file, You can obtain one at https://mozilla.org/MPL/2.0/.
*/
package org.xwiki.xml.internal.html;

import java.util.Arrays;
import java.util.HashSet;
import java.util.Set;

import javax.inject.Singleton;

import org.xwiki.component.annotation.Component;

/**
* Provides definitions of safe HTML attributes and tags.
* <p>
* Unless otherwise noted, lists of elements and attributes are copied from DOMPurify by Cure53 and other contributors |
* Released under the Apache license 2.0 and Mozilla Public License 2.0 -
* <a href="https://github.com/cure53/DOMPurify/blob/main/LICENSE">LICENSE</a>.
*
* @version $Id$
* @since 14.6RC1
*/
@Component(roles = HTMLDefinitions.class)
@Singleton
public class HTMLDefinitions
{
/**
* Allowed HTML elements.
*/
private final Set<String> htmlTags;

/**
* Allowed attributes.
*/
private final Set<String> htmlAttributes;

/**
* Default constructor.
*/
public HTMLDefinitions()
{
this.htmlTags = new HashSet<>(
Arrays.asList("a", "abbr", "acronym", "address", "area", "article", "aside", "audio", "b", "bdi", "bdo",
"big", "blink", "blockquote", "body", "br", "button", "canvas", "caption", "center", "cite", "code",
"col", "colgroup", "content", "data", "datalist", "dd", "decorator", "del", "details", "dfn", "dialog",
"dir", "div", "dl", "dt", "element", "em", "fieldset", "figcaption", "figure", "font", "footer", "form",
"h1", "h2", "h3", "h4", "h5", "h6", "head", "header", "hgroup", "hr", "html", "i", "img", "input",
"ins", "kbd", "label", "legend", "li", "main", "map", "mark", "marquee", "menu", "menuitem", "meter",
"nav", "nobr", "ol", "optgroup", "option", "output", "p", "picture", "pre", "progress", "q", "rp", "rt",
"ruby", "s", "samp", "section", "select", "shadow", "small", "source", "spacer", "span", "strike",
"strong", "style", "sub", "summary", "sup", "table", "tbody", "td", "template", "textarea", "tfoot",
"th", "thead", "time", "tr", "track", "tt", "u", "ul", "var", "video", "wbr"));

// Attributes that are in general allowed. Note that "target" is not generally safe, but XWiki contains code
// that already adds the necessary attributes to make it safe both in HTMLCleaner and in XHTML rendering.
this.htmlAttributes = new HashSet<>(
Arrays.asList("accept", "action", "align", "alt", "autocapitalize", "autocomplete", "autopictureinpicture",
"autoplay", "background", "bgcolor", "border", "capture", "cellpadding", "cellspacing", "checked",
"cite", "class", "clear", "color", "cols", "colspan", "controls", "controlslist", "coords",
"crossorigin", "datetime", "decoding", "default", "dir", "disabled", "disablepictureinpicture",
"disableremoteplayback", "download", "draggable", "enctype", "enterkeyhint", "face", "for", "headers",
"height", "hidden", "high", "href", "hreflang", "id", "inputmode", "integrity", "ismap", "kind",
"label", "lang", "list", "loading", "loop", "low", "max", "maxlength", "media", "method", "min",
"minlength", "multiple", "muted", "name", "nonce", "noshade", "novalidate", "nowrap", "open", "optimum",
"pattern", "placeholder", "playsinline", "poster", "preload", "pubdate", "radiogroup", "readonly",
"rel", "required", "rev", "reversed", "role", "rows", "rowspan", "spellcheck", "scope", "selected",
"shape", "size", "sizes", "span", "srclang", "start", "src", "srcset", "step", "style", "summary",
"tabindex", "title", "translate", "type", "usemap", "valign", "value", "width", "xmlns", "slot",
"target"));
}

/**
* @param tagName the name of the tag to check
* @return if the tag is considered safe
*/
public boolean isSafeTag(String tagName)
{
return this.htmlTags.contains(tagName);
}

/**
* @param attributeName the name of the attribute to check
* @return if the attribute is allowed
*/
public boolean isAllowedAttribute(String attributeName)
{
return this.htmlAttributes.contains(attributeName);
}
}

0 comments on commit 4a185e0

Please sign in to comment.