Skip to content

Commit

Permalink
Make List module HTML5 spec compliant
Browse files Browse the repository at this point in the history
  • Loading branch information
xemlock committed Jun 2, 2020
1 parent ed5dcc0 commit a8b3229
Show file tree
Hide file tree
Showing 9 changed files with 729 additions and 18 deletions.
140 changes: 140 additions & 0 deletions library/HTMLPurifier/ChildDef/HTML5/Div.php
@@ -0,0 +1,140 @@
<?php

/**
* HTML5 compliant div content model
*
* In HTML5 divs can be nested directly under dl elements. When they are,
* they are expected to have dt and dd children.
*/
class HTMLPurifier_ChildDef_HTML5_Div extends HTMLPurifier_ChildDef_Optional
{
public $type = 'div';

public $allow_empty = true;

public $elements = array(
'#PCDATA' => true,
'Flow' => true,
'dt' => true,
'dd' => true,
);

protected $init = false;

public function __construct()
{
parent::__construct($this->elements);
}

/**
* @param HTMLPurifier_Config $config
* @return array
*/
public function getAllowedElements($config)
{
$this->init($config);
return $this->elements;
}

protected function init(HTMLPurifier_Config $config)
{
if ($this->init) {
return;
}
$def = $config->getHTMLDefinition();
$elements = array();
foreach ($this->elements as $name => $_) {
if (isset($def->info_content_sets[$name])) {
$elements = array_merge($elements, $def->info_content_sets[$name]);
} else {
$elements[$name] = true;
}
}
$this->elements = $elements;
$this->init = true;
}

/**
* @param HTMLPurifier_Node[] $children
* @param HTMLPurifier_Config $config
* @param HTMLPurifier_Context $context
* @return array|bool
*/
public function validateChildren($children, $config, $context)
{
$this->init($config);

$currentNode = $context->get('CurrentNode', true);
if (empty($currentNode->attr['DlDiv'])) {
return $this->validateDivChildren($children, $config, $context);
}

return $this->validateDlDivChildren($children);
}

/**
* @param HTMLPurifier_Node[] $children
* @param HTMLPurifier_Config $config
* @param HTMLPurifier_Context $context
* @return array|bool
*/
protected function validateDivChildren(array $children, HTMLPurifier_Config $config, HTMLPurifier_Context $context)
{
// Filter out dt and dd, but try to retain their contents (if any),
// just as MakeWellFormed strategy would do to unrecognized elements
$result = array();
foreach ($children as $child) {
if (isset($child->name) && ($child->name === 'dt' || $child->name === 'dd')) {
foreach ($child->children as $c) {
$result[] = $c;
}
} else {
$result[] = $child;
}
}
return parent::validateChildren($result, $config, $context);
}

/**
* @param HTMLPurifier_Node[] $children
* @return array|bool
*/
protected function validateDlDivChildren(array $children)
{
// div in dl is required to have (dt+, dd+) content
// https://html.spec.whatwg.org/multipage/grouping-content.html#the-dl-element
// Related discussion: https://github.com/whatwg/html/issues/1937

$dt = null;
$dd = null;
$result = array();

foreach ($children as $child) {
if (!empty($child->is_whitespace)) {
$result[] = $child;
continue;
}
if ($child->name === 'dt' && !$dd) {
$dt = $child;
$result[] = $child;
} elseif ($child->name === 'dd') {
$dd = $child;
$result[] = $dd;
}
}

if (!$dd && !$dt) {
return false;
}

if (!$dt) {
array_unshift($result, new HTMLPurifier_Node_Element('dt'));
}

if (!$dd) {
$result[] = new HTMLPurifier_Node_Element('dd');
}

return $result;
}
}
119 changes: 119 additions & 0 deletions library/HTMLPurifier/ChildDef/HTML5/Dl.php
@@ -0,0 +1,119 @@
<?php

/**
* HTML5 compliant dl content model
*
* @see https://html.spec.whatwg.org/multipage/grouping-content.html#the-dl-element
*/
class HTMLPurifier_ChildDef_HTML5_Dl extends HTMLPurifier_ChildDef
{
public $type = 'dl';

public $elements = array(
'dt' => true,
'dd' => true,
'div' => true,
);

/**
* @param HTMLPurifier_Node[] $children
* @param HTMLPurifier_Config $config
* @param HTMLPurifier_Context $context
* @return array|bool|void
*/
public function validateChildren($children, $config, $context)
{
// if dt or dd is not allowed, delete parent node
if (!isset($config->getHTMLDefinition()->info['dt'])) {
trigger_error("Cannot allow dl without allowing dt", E_USER_WARNING);
return false;
}
if (!isset($config->getHTMLDefinition()->info['dd'])) {
trigger_error("Cannot allow dl without allowing dd", E_USER_WARNING);
return false;
}

// Content model:
// The dl element represents a description list of zero or more term-description
// groups. Each term-description group consists of one or more terms (represented
// by dt elements) possibly as children of a div element child, and one or more
// descriptions (represented by dd elements possibly as children of a div element
// child), ignoring any nodes other than dt and dd element children, and dt and dd
// elements that are children of div element children within a single dl element.
// https://html.spec.whatwg.org/multipage/grouping-content.html#the-dl-element

// Related discussion: https://github.com/whatwg/html/issues/1937

// Detect if the first child element is a div, if yes, then all
// children are expected to be divs
foreach ($children as $node) {
if (!empty($node->is_whitespace)) {
continue;
}
if ($node->name === 'div') {
$result = $this->validateDivChildren($children);
return $result;
}
break;
}

return $this->validateDtDdChildren($children);
}

/**
* @param HTMLPurifier_Node[] $children
* @return HTMLPurifier_Node[]
*/
protected function validateDivChildren(array $children)
{
$result = array();
foreach ($children as $node) {
if (!empty($node->is_whitespace)) {
$result[] = $node;
continue;
}
if ($node->name === 'div') {
$result[] = $node;
}
}
return $result;
}

/**
* @param HTMLPurifier_Node[] $children
* @return HTMLPurifier_Node[]
*/
protected function validateDtDdChildren(array $children)
{
$result = array();

$dt = null;
$dd = null;

foreach ($children as $node) {
if (!empty($node->is_whitespace)) {
$result[] = $node;
continue;
}
if ($node->name === 'dt') {
$dt = $node;
$dd = null;
$result[] = $node;
} elseif ($node->name === 'dd') {
if ($dt === null) {
$dt = new HTMLPurifier_Node_Element('dt');
$result[] = $dt;
}
$dd = $node;
$result[] = $node;
}
}

// There must be at least one dd after dt
if ($dt && !$dd) {
$result[] = new HTMLPurifier_Node_Element('dd');
}

return $result;
}
}
57 changes: 57 additions & 0 deletions library/HTMLPurifier/ChildDef/HTML5/List.php
@@ -0,0 +1,57 @@
<?php

/**
* HTML5 compliant content definition for ul and ol elements.
*
* The only difference between this and {@link HTMLPurifier_ChildDef_List}
* is that HTML5 spec allows empty lists.
*/
class HTMLPurifier_ChildDef_HTML5_List extends HTMLPurifier_ChildDef
{
public $type = 'list';

public $elements = array(
'li' => true,
'ul' => true,
'ol' => true,
);

/**
* @param HTMLPurifier_Node[] $children
* @param HTMLPurifier_Config $config
* @param HTMLPurifier_Context $context
* @return array|bool
*/
public function validateChildren($children, $config, $context)
{
// if li is not allowed, delete parent node
if (!isset($config->getHTMLDefinition()->info['li'])) {
trigger_error("Cannot allow ul/ol without allowing li", E_USER_WARNING);
return false;
}

$result = array();
$li = null;

foreach ($children as $node) {
if (!empty($node->is_whitespace)) {
$result[] = $node;
continue;
}
if ($node->name === 'li') {
$li = $node;
$result[] = $node;
} else {
// tuck this element into the previous li
if ($li === null) {
$li = new HTMLPurifier_Node_Element('li');
$result[] = $li;
}
$li->children[] = $node;
$li->empty = false;
}
}

return $result;
}
}
2 changes: 1 addition & 1 deletion library/HTMLPurifier/HTML5Config.php
Expand Up @@ -2,7 +2,7 @@

class HTMLPurifier_HTML5Config extends HTMLPurifier_Config
{
const REVISION = 2020053001;
const REVISION = 2020060101;

/**
* @param string|array|HTMLPurifier_Config $config
Expand Down
2 changes: 2 additions & 0 deletions library/HTMLPurifier/HTML5Definition.php
Expand Up @@ -48,6 +48,8 @@ public static function setupHTMLDefinition(HTMLPurifier_HTMLDefinition $def, HTM

$def->manager->attrTypes->set('Datetime', new HTMLPurifier_AttrDef_HTML5_Datetime());

$def->info_injector[] = new HTMLPurifier_Injector_HTML5_DlDiv();

return $def;
}
}

0 comments on commit a8b3229

Please sign in to comment.