Skip to content

Commit

Permalink
Add HTML.Link config switch, refactor shared rel validation logic
Browse files Browse the repository at this point in the history
  • Loading branch information
xemlock committed Aug 30, 2021
1 parent f457754 commit 4bfaedd
Show file tree
Hide file tree
Showing 9 changed files with 171 additions and 93 deletions.
38 changes: 28 additions & 10 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -73,38 +73,56 @@ Apart from HTML Purifier's built-in [configuration directives](http://htmlpurifi
Type: [Boolean](http://htmlpurifier.org/live/configdoc/plain.html#type-bool)\
Default: `false`

Whether or not to permit `allowfullscreen` attribute on `iframe` tags. It requires either [%HTML.SafeIframe](http://htmlpurifier.org/live/configdoc/plain.html#HTML.SafeIframe) or [%HTML.Trusted](http://htmlpurifier.org/live/configdoc/plain.html#HTML.Trusted) to be `true`.
Whether or not to permit `allowfullscreen` attribute on `iframe` tags. It requires either
[%HTML.SafeIframe](http://htmlpurifier.org/live/configdoc/plain.html#HTML.SafeIframe) or
[%HTML.Trusted](http://htmlpurifier.org/live/configdoc/plain.html#HTML.Trusted) to be `true`.

* __HTML.XHTML__
* __HTML.Link__

Version added: 0.1.12\
Type: [Boolean](http://htmlpurifier.org/live/configdoc/plain.html#type-bool)\
Default: `false`

While deprecated in HTML 4.01 / XHTML 1.0 context, in HTML5 it's used for
enabling support for namespaced attributes and XML self-closing tags.
Permit the `link` tags in the user input, regardless of
[%HTML.Trusted](http://htmlpurifier.org/live/configdoc/plain.html#HTML.Trusted) value.
This effectively allows `link` tags without allowing other untrusted elements.

When enabled it causes `xml:lang` attribute to take precedence over `lang`,
when both attributes are present on the same element.
If enabled, URIs in `link` tags will not be matched against a whitelist specified
in %URI.SafeLinkRegexp (unless %HTML.SafeIframe is also enabled).

* __HTML.SafeLink__

Version added: 0.1.12\
Type: [Boolean](http://htmlpurifier.org/live/configdoc/plain.html#type-bool)\
Default: `false`

Permit the link tags in untrusted documents. This directive must
be accompanied by a whitelist of permitted URIs via %URI.SafeLinkRegexp.
Whether to permit `link` tags in untrusted documents. This directive must
be accompanied by a whitelist of permitted URIs via %URI.SafeLinkRegexp,
otherwise no `link` tags will be allowed.

* __HTML.XHTML__

Version added: 0.1.12\
Type: [Boolean](http://htmlpurifier.org/live/configdoc/plain.html#type-bool)\
Default: `false`

While deprecated in HTML 4.01 / XHTML 1.0 context, in HTML5 it's used for
enabling support for namespaced attributes and XML self-closing tags.

When enabled it causes `xml:lang` attribute to take precedence over `lang`,
when both attributes are present on the same element.

* __URI.SafeLinkRegexp__

Version added: 0.1.12\
Type: [String](http://htmlpurifier.org/live/configdoc/plain.html#type-string)\
Default: `null`

A PCRE regular expression that will be matched against a `<link>` URI. Here are some example values:
A PCRE regular expression that will be matched against a `<link>` URI. This directive
only has an effect if %HTML.SafeLink is enabled. Here are some example values:
`%^https?://localhost/%` - Allow localhost URIs
Use `Attr.AllowedRel` to control permitted link types.

Use `Attr.AllowedRel` to control permitted link relationship types.

## Supported HTML5 elements

Expand Down
62 changes: 3 additions & 59 deletions library/HTMLPurifier/AttrDef/HTML5/ARel.php
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,12 @@
/**
* Validates 'rel' attribute on <a> and <area> elements, as defined by the
* HTML5 spec and the MicroFormats link type extensions tables.
*
* @see https://html.spec.whatwg.org/multipage/links.html#linkTypes
*/
class HTMLPurifier_AttrDef_HTML5_ARel extends HTMLPurifier_AttrDef
class HTMLPurifier_AttrDef_HTML5_ARel extends HTMLPurifier_AttrDef_HTML5_Rel
{
/**
* Lookup table for valid values
* @var array
*/
protected static $values = array(
// https://html.spec.whatwg.org/multipage/links.html#linkTypes
'alternate' => true,
'author' => true,
'bookmark' => true,
Expand Down Expand Up @@ -92,57 +89,4 @@ class HTMLPurifier_AttrDef_HTML5_ARel extends HTMLPurifier_AttrDef
'webmention' => true,
'widget' => true,
);

/**
* Return lookup table for valid 'rel' values
*
* @return array
* @codeCoverageIgnore
*/
public static function values()
{
return self::$values;
}

/**
* @var array
*/
protected $allowed;

/**
* @param string $string
* @param HTMLPurifier_Config $config
* @param HTMLPurifier_Context $context
* @return bool|string
*/
public function validate($string, $config, $context)
{
if ($this->allowed === null) {
$allowedRel = (array) $config->get('Attr.AllowedRel');
if (empty($allowedRel)) {
$allowed = array();
} else {
$allowed = array_intersect_key($allowedRel, static::$values);
}
$this->allowed = $allowed;
}

$string = $this->parseCDATA($string);
$parts = explode(' ', $string);

$result = array();
foreach ($parts as $part) {
$part = strtolower(trim($part));
if (!isset($this->allowed[$part])) {
continue;
}
$result[$part] = true;
}

if (empty($result)) {
return false;
}

return implode(' ', array_keys($result));
}
}
25 changes: 13 additions & 12 deletions library/HTMLPurifier/AttrDef/HTML5/LinkRel.php
Original file line number Diff line number Diff line change
@@ -1,21 +1,22 @@
<?php

/*
* Keywords that are body-ok affect whether link elements are allowed in the body.
* The body-ok keywords are dns-prefetch, modulepreload, pingback, preconnect, prefetch, preload, prerender,
* and stylesheet.
*
* https://html.spec.whatwg.org/multipage/links.html#body-ok
/**
* Validates 'rel' attribute on <link> elements, as defined by the HTML5 spec.
*
* @note We cannot use Enum because multiple values are allowed.
* Keywords that are body-ok affect whether link elements are allowed in the body.
* @see https://html.spec.whatwg.org/multipage/links.html#body-ok
* @see https://html.spec.whatwg.org/multipage/links.html#linkTypes
*/
class HTMLPurifier_AttrDef_HTML5_LinkRel extends HTMLPurifier_AttrDef_HTML5_ARel
class HTMLPurifier_AttrDef_HTML5_LinkRel extends HTMLPurifier_AttrDef_HTML5_Rel
{
/**
* Lookup table for valid values
* @var array
*/
protected static $values = array(
'dns-prefetch' => true,
'modulepreload' => true,
'pingback' => true,
'preconnect' => true,
'prefetch' => true,
'preload' => true,
'prerender' => true,
'stylesheet' => true,
);
}
62 changes: 62 additions & 0 deletions library/HTMLPurifier/AttrDef/HTML5/Rel.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
<?php

/**
* Shared validation logic for 'rel' attribute on <a>, <area>, <form> and <link> elements,
* as defined by the HTML5 spec and the MicroFormats link type extensions tables.
*
* @see https://html.spec.whatwg.org/multipage/links.html#linkTypes
*/
abstract class HTMLPurifier_AttrDef_HTML5_Rel extends HTMLPurifier_AttrDef
{
/**
* Lookup table for valid rel values.
* Stored as a static variable to minimize serialization footprint.
* @var array
*/
protected static $values = array();

/**
* Lazy loaded lookup for allowed rel values, based on provided config.
* @var array
*/
protected $allowed;

/**
* @param string $string
* @param HTMLPurifier_Config $config
* @param HTMLPurifier_Context $context
* @return bool|string
*/
public function validate($string, $config, $context)
{
if ($this->allowed === null) {
$allowedRel = (array) $config->get('Attr.AllowedRel');
if (empty($allowedRel)) {
$allowed = array();
} else {
$allowed = array_intersect_key($allowedRel, static::$values);
}
$this->allowed = $allowed;
}

$string = $this->parseCDATA($string);
$parts = explode(' ', $string);

$result = array();
foreach ($parts as $part) {
// Link type keywords are always ASCII case-insensitive, and must be compared as such.
// https://html.spec.whatwg.org/multipage/links.html#linkTypes
$part = strtolower(trim($part));
if (!isset($this->allowed[$part])) {
continue;
}
$result[$part] = true;
}

if (empty($result)) {
return false;
}

return implode(' ', array_keys($result));
}
}
2 changes: 1 addition & 1 deletion library/HTMLPurifier/HTML5Config.php
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

class HTMLPurifier_HTML5Config extends HTMLPurifier_Config
{
const REVISION = 2021083002;
const REVISION = 2021083003;

/**
* @param string|array|HTMLPurifier_Config $config
Expand Down
5 changes: 2 additions & 3 deletions library/HTMLPurifier/HTML5URIDefinition.php
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
<?php

class HTMLPurifier_HTML5URIDefinition
abstract class HTMLPurifier_HTML5URIDefinition
{
public static function setupDefinition(HTMLPurifier_URIDefinition $def, HTMLPurifier_Config $config)
{
$def->registerFilter(new HTMLPurifier_URIFilter_SafeLink);

$def->registerFilter(new HTMLPurifier_URIFilter_HTML5_SafeLink());
return $def;
}
}
13 changes: 9 additions & 4 deletions library/HTMLPurifier/HTMLModule/HTML5/Link.php
Original file line number Diff line number Diff line change
Expand Up @@ -5,26 +5,31 @@
*/
class HTMLPurifier_HTMLModule_HTML5_Link extends HTMLPurifier_HTMLModule
{
/**
* @type string
*/
public $name = 'HTML5_Link';

/**
* @type bool
*/
public $safe = false;

/**
* @param HTMLPurifier_Config $config
*/
public function setup($config)
{
if ($config->get('HTML.SafeLink')) {
if ($config->get('HTML.Link') || $config->get('HTML.SafeLink')) {
$this->safe = true;
}

// https://html.spec.whatwg.org/dev/semantics.html#the-link-element
$this->addElement('link', 'Flow', 'Empty', null, array(
'rel*' => new HTMLPurifier_AttrDef_HTML5_LinkRel,
'type' => new HTMLPurifier_AttrDef_Enum(array('text/css')),
'rel*' => new HTMLPurifier_AttrDef_HTML5_LinkRel(),
'href*' => new HTMLPurifier_AttrDef_URI(true),
'type' => 'Text',
));

$this->addElementToContentSet('link', 'Inline');
}
}
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
<?php

class HTMLPurifier_URIFilter_SafeLink extends HTMLPurifier_URIFilter
class HTMLPurifier_URIFilter_HTML5_SafeLink extends HTMLPurifier_URIFilter
{
/**
* @type string
*/
public $name = 'SafeLink';
public $name = 'HTML5_SafeLink';

/**
* @type bool
Expand All @@ -24,7 +24,6 @@ class HTMLPurifier_URIFilter_SafeLink extends HTMLPurifier_URIFilter
public function prepare($config)
{
$this->regexp = $config->get('URI.SafeLinkRegexp');

return true;
}

Expand All @@ -37,7 +36,7 @@ public function prepare($config)
public function filter(&$uri, $config, $context)
{
// check if filter not applicable
if ($config->get('HTML.SafeLink') !== true) {
if (!$config->get('HTML.SafeLink')) {
return true;
}

Expand Down
50 changes: 50 additions & 0 deletions tests/HTMLPurifier/HTMLModule/HTML5/LinkTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -116,4 +116,54 @@ public function testMultipleElements()
'<span>Foo</span><link href="https://localhost/foo.css" rel="stylesheet">'
);
}

public function testEmptyAllowedRel()
{
$this->config->set('Attr.AllowedRel', array());

$this->assertPurification(
'<link href="https://localhost/foo.css" rel="stylesheet">',
''
);
}

public function testInvalidRel()
{
$this->config->set('Attr.AllowedRel', array('alternate', 'stylesheet'));

// 'alternate' is not allowed in links in body
// https://html.spec.whatwg.org/multipage/links.html#linkTypes
$this->assertPurification(
'<link href="https://localhost/foo.css" rel="alternate stylesheet">',
'<link href="https://localhost/foo.css" rel="stylesheet">'
);
}

public function testDisableExternalResources()
{
$this->config->set('URI.DisableResources', true);

$this->assertPurification(
'<link href="https://localhost/foo.css" rel="stylesheet">',
''
);
}

public function testEnableWithHtmlTrustedConfig()
{
$this->config->set('HTML.Link', false);
$this->config->set('HTML.SafeLink', false);
$this->config->set('HTML.Trusted', true);

$this->assertPurification('<link href="http://google.com/foo.css" rel="stylesheet">');
}

public function testEnableWithHtmlLinkConfig()
{
$this->config->set('HTML.Link', true);
$this->config->set('HTML.SafeLink', false);
$this->config->set('HTML.Trusted', true);

$this->assertPurification('<link href="http://google.com/foo.css" rel="stylesheet">');
}
}

0 comments on commit 4bfaedd

Please sign in to comment.