Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Make List module HTML5 spec compliant
- Loading branch information
Showing
9 changed files
with
729 additions
and
18 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,140 @@ | ||
<?php | ||
|
||
/** | ||
* HTML5 compliant div content model | ||
* | ||
* In HTML5 divs can be nested directly under dl elements. When they are, | ||
* they are expected to have dt and dd children. | ||
*/ | ||
class HTMLPurifier_ChildDef_HTML5_Div extends HTMLPurifier_ChildDef_Optional | ||
{ | ||
public $type = 'div'; | ||
|
||
public $allow_empty = true; | ||
|
||
public $elements = array( | ||
'#PCDATA' => true, | ||
'Flow' => true, | ||
'dt' => true, | ||
'dd' => true, | ||
); | ||
|
||
protected $init = false; | ||
|
||
public function __construct() | ||
{ | ||
parent::__construct($this->elements); | ||
} | ||
|
||
/** | ||
* @param HTMLPurifier_Config $config | ||
* @return array | ||
*/ | ||
public function getAllowedElements($config) | ||
{ | ||
$this->init($config); | ||
return $this->elements; | ||
} | ||
|
||
protected function init(HTMLPurifier_Config $config) | ||
{ | ||
if ($this->init) { | ||
return; | ||
} | ||
$def = $config->getHTMLDefinition(); | ||
$elements = array(); | ||
foreach ($this->elements as $name => $_) { | ||
if (isset($def->info_content_sets[$name])) { | ||
$elements = array_merge($elements, $def->info_content_sets[$name]); | ||
} else { | ||
$elements[$name] = true; | ||
} | ||
} | ||
$this->elements = $elements; | ||
$this->init = true; | ||
} | ||
|
||
/** | ||
* @param HTMLPurifier_Node[] $children | ||
* @param HTMLPurifier_Config $config | ||
* @param HTMLPurifier_Context $context | ||
* @return array|bool | ||
*/ | ||
public function validateChildren($children, $config, $context) | ||
{ | ||
$this->init($config); | ||
|
||
$currentNode = $context->get('CurrentNode', true); | ||
if (empty($currentNode->attr['DlDiv'])) { | ||
return $this->validateDivChildren($children, $config, $context); | ||
} | ||
|
||
return $this->validateDlDivChildren($children); | ||
} | ||
|
||
/** | ||
* @param HTMLPurifier_Node[] $children | ||
* @param HTMLPurifier_Config $config | ||
* @param HTMLPurifier_Context $context | ||
* @return array|bool | ||
*/ | ||
protected function validateDivChildren(array $children, HTMLPurifier_Config $config, HTMLPurifier_Context $context) | ||
{ | ||
// Filter out dt and dd, but try to retain their contents (if any), | ||
// just as MakeWellFormed strategy would do to unrecognized elements | ||
$result = array(); | ||
foreach ($children as $child) { | ||
if (isset($child->name) && ($child->name === 'dt' || $child->name === 'dd')) { | ||
foreach ($child->children as $c) { | ||
$result[] = $c; | ||
} | ||
} else { | ||
$result[] = $child; | ||
} | ||
} | ||
return parent::validateChildren($result, $config, $context); | ||
} | ||
|
||
/** | ||
* @param HTMLPurifier_Node[] $children | ||
* @return array|bool | ||
*/ | ||
protected function validateDlDivChildren(array $children) | ||
{ | ||
// div in dl is required to have (dt+, dd+) content | ||
// https://html.spec.whatwg.org/multipage/grouping-content.html#the-dl-element | ||
// Related discussion: https://github.com/whatwg/html/issues/1937 | ||
|
||
$dt = null; | ||
$dd = null; | ||
$result = array(); | ||
|
||
foreach ($children as $child) { | ||
if (!empty($child->is_whitespace)) { | ||
$result[] = $child; | ||
continue; | ||
} | ||
if ($child->name === 'dt' && !$dd) { | ||
$dt = $child; | ||
$result[] = $child; | ||
} elseif ($child->name === 'dd') { | ||
$dd = $child; | ||
$result[] = $dd; | ||
} | ||
} | ||
|
||
if (!$dd && !$dt) { | ||
return false; | ||
} | ||
|
||
if (!$dt) { | ||
array_unshift($result, new HTMLPurifier_Node_Element('dt')); | ||
} | ||
|
||
if (!$dd) { | ||
$result[] = new HTMLPurifier_Node_Element('dd'); | ||
} | ||
|
||
return $result; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,119 @@ | ||
<?php | ||
|
||
/** | ||
* HTML5 compliant dl content model | ||
* | ||
* @see https://html.spec.whatwg.org/multipage/grouping-content.html#the-dl-element | ||
*/ | ||
class HTMLPurifier_ChildDef_HTML5_Dl extends HTMLPurifier_ChildDef | ||
{ | ||
public $type = 'dl'; | ||
|
||
public $elements = array( | ||
'dt' => true, | ||
'dd' => true, | ||
'div' => true, | ||
); | ||
|
||
/** | ||
* @param HTMLPurifier_Node[] $children | ||
* @param HTMLPurifier_Config $config | ||
* @param HTMLPurifier_Context $context | ||
* @return array|bool|void | ||
*/ | ||
public function validateChildren($children, $config, $context) | ||
{ | ||
// if dt or dd is not allowed, delete parent node | ||
if (!isset($config->getHTMLDefinition()->info['dt'])) { | ||
trigger_error("Cannot allow dl without allowing dt", E_USER_WARNING); | ||
return false; | ||
} | ||
if (!isset($config->getHTMLDefinition()->info['dd'])) { | ||
trigger_error("Cannot allow dl without allowing dd", E_USER_WARNING); | ||
return false; | ||
} | ||
|
||
// Content model: | ||
// The dl element represents a description list of zero or more term-description | ||
// groups. Each term-description group consists of one or more terms (represented | ||
// by dt elements) possibly as children of a div element child, and one or more | ||
// descriptions (represented by dd elements possibly as children of a div element | ||
// child), ignoring any nodes other than dt and dd element children, and dt and dd | ||
// elements that are children of div element children within a single dl element. | ||
// https://html.spec.whatwg.org/multipage/grouping-content.html#the-dl-element | ||
|
||
// Related discussion: https://github.com/whatwg/html/issues/1937 | ||
|
||
// Detect if the first child element is a div, if yes, then all | ||
// children are expected to be divs | ||
foreach ($children as $node) { | ||
if (!empty($node->is_whitespace)) { | ||
continue; | ||
} | ||
if ($node->name === 'div') { | ||
$result = $this->validateDivChildren($children); | ||
return $result; | ||
} | ||
break; | ||
} | ||
|
||
return $this->validateDtDdChildren($children); | ||
} | ||
|
||
/** | ||
* @param HTMLPurifier_Node[] $children | ||
* @return HTMLPurifier_Node[] | ||
*/ | ||
protected function validateDivChildren(array $children) | ||
{ | ||
$result = array(); | ||
foreach ($children as $node) { | ||
if (!empty($node->is_whitespace)) { | ||
$result[] = $node; | ||
continue; | ||
} | ||
if ($node->name === 'div') { | ||
$result[] = $node; | ||
} | ||
} | ||
return $result; | ||
} | ||
|
||
/** | ||
* @param HTMLPurifier_Node[] $children | ||
* @return HTMLPurifier_Node[] | ||
*/ | ||
protected function validateDtDdChildren(array $children) | ||
{ | ||
$result = array(); | ||
|
||
$dt = null; | ||
$dd = null; | ||
|
||
foreach ($children as $node) { | ||
if (!empty($node->is_whitespace)) { | ||
$result[] = $node; | ||
continue; | ||
} | ||
if ($node->name === 'dt') { | ||
$dt = $node; | ||
$dd = null; | ||
$result[] = $node; | ||
} elseif ($node->name === 'dd') { | ||
if ($dt === null) { | ||
$dt = new HTMLPurifier_Node_Element('dt'); | ||
$result[] = $dt; | ||
} | ||
$dd = $node; | ||
$result[] = $node; | ||
} | ||
} | ||
|
||
// There must be at least one dd after dt | ||
if ($dt && !$dd) { | ||
$result[] = new HTMLPurifier_Node_Element('dd'); | ||
} | ||
|
||
return $result; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
<?php | ||
|
||
/** | ||
* HTML5 compliant content definition for ul and ol elements. | ||
* | ||
* The only difference between this and {@link HTMLPurifier_ChildDef_List} | ||
* is that HTML5 spec allows empty lists. | ||
*/ | ||
class HTMLPurifier_ChildDef_HTML5_List extends HTMLPurifier_ChildDef | ||
{ | ||
public $type = 'list'; | ||
|
||
public $elements = array( | ||
'li' => true, | ||
'ul' => true, | ||
'ol' => true, | ||
); | ||
|
||
/** | ||
* @param HTMLPurifier_Node[] $children | ||
* @param HTMLPurifier_Config $config | ||
* @param HTMLPurifier_Context $context | ||
* @return array|bool | ||
*/ | ||
public function validateChildren($children, $config, $context) | ||
{ | ||
// if li is not allowed, delete parent node | ||
if (!isset($config->getHTMLDefinition()->info['li'])) { | ||
trigger_error("Cannot allow ul/ol without allowing li", E_USER_WARNING); | ||
return false; | ||
} | ||
|
||
$result = array(); | ||
$li = null; | ||
|
||
foreach ($children as $node) { | ||
if (!empty($node->is_whitespace)) { | ||
$result[] = $node; | ||
continue; | ||
} | ||
if ($node->name === 'li') { | ||
$li = $node; | ||
$result[] = $node; | ||
} else { | ||
// tuck this element into the previous li | ||
if ($li === null) { | ||
$li = new HTMLPurifier_Node_Element('li'); | ||
$result[] = $li; | ||
} | ||
$li->children[] = $node; | ||
$li->empty = false; | ||
} | ||
} | ||
|
||
return $result; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.