Skip to content

Commit

Permalink
Adjust code style in HTML5 lexer
Browse files Browse the repository at this point in the history
  • Loading branch information
xemlock committed Dec 31, 2019
1 parent 92cce65 commit 956ad0f
Show file tree
Hide file tree
Showing 3 changed files with 29 additions and 16 deletions.
4 changes: 2 additions & 2 deletions composer.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"name": "xemlock/htmlpurifier-html5",
"type": "library",
"description": "HTML5 element definitions for HTML Purifier",
"description": "HTML5 support for HTML Purifier",
"keywords": [
"HtmlPurifier",
"html",
Expand Down Expand Up @@ -36,6 +36,6 @@
"test": "phpunit --coverage-html build/coverage"
},
"suggest": {
"masterminds/html5": "Required to use HTMLPurifier_Lexer_HTML5."
"masterminds/html5": "Required to use HTMLPurifier_Lexer_HTML5"
}
}
27 changes: 18 additions & 9 deletions library/HTMLPurifier/Lexer/HTML5.php
Original file line number Diff line number Diff line change
@@ -1,14 +1,24 @@
<?php

/**
* Class HTMLPurifier_Lexer_HTML5
*
* Experimental HTML5-based parser using masterminds/html5 library.
* Experimental HTML5-compliant parser using masterminds/html5 library.
*/
class HTMLPurifier_Lexer_HTML5 extends HTMLPurifier_Lexer_DOMLex
{
/**
* Lexes an HTML string into tokens.
* @throws HTMLPurifier_Exception
* @codeCoverageIgnore
*/
public function __construct()
{
if (!class_exists('\Masterminds\HTML5')) {
throw new HTMLPurifier_Exception('Cannot instantiate HTML5 lexer. \Masterminds\HTML5 class is not available');
}
parent::__construct();
}

/**
* Transforms an HTML string into tokens.
*
* @param string $html
* @param HTMLPurifier_Config $config
Expand All @@ -20,16 +30,15 @@ public function tokenizeHTML($html, $config, $context)
$html = $this->normalize($html, $config, $context);
$html = $this->armor($html, $config);

// preprocess html. masterminds/html5 requires <html>, <head> and <body> tags.
// <meta charset> is also essential for utf-8
// masterminds/html5 requires <html>, <head> and <body> tags
$html = $this->wrapHTML($html, $config, $context, false);

// Parse the document. $dom is a DOMDocument.
$html5 = new \Masterminds\HTML5(array('disable_html_ns' => true));
$doc = $html5->loadHTML($html);

$body = $doc->getElementsByTagName('html')->item(0) // <html>
->getElementsByTagName('body')->item(0); // <body>
$body = $doc->getElementsByTagName('html')->item(0) // <html>
->getElementsByTagName('body')->item(0); // <body>

$tokens = array();
$this->tokenizeDOM($body, $tokens, $config);
Expand All @@ -45,7 +54,7 @@ public function tokenizeHTML($html, $config, $context)
* @param HTMLPurifier_Config $config
* @return string
*/
protected function armor($html, $config)
protected function armor($html, HTMLPurifier_Config $config)
{
if ($config->get('Core.AggressivelyFixLt')) {
$char = '[^a-z!\/]';
Expand Down
14 changes: 9 additions & 5 deletions tests/HTMLPurifier/Lexer/HTML5Test.php
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
<?php

/** @noinspection PhpDocMissingThrowsInspection */
/** @noinspection PhpUnhandledExceptionInspection */

class HTMLPurifier_Lexer_HTML5Test extends BaseTestCase
{
/**
Expand All @@ -12,11 +15,11 @@ class HTMLPurifier_Lexer_HTML5Test extends BaseTestCase
*
* @return void
*/
public function setUp()
protected function setUp()
{
parent::setUp();

$this->context = new HTMLPurifier_Context;
$this->context = new HTMLPurifier_Context();
}

/**
Expand All @@ -27,7 +30,7 @@ public function setUp()
*/
public function test_create_objectLexerImpl()
{
$this->config->set('Core.LexerImpl', new HTMLPurifier_Lexer_HTML5);
$this->config->set('Core.LexerImpl', new HTMLPurifier_Lexer_HTML5());

$lexer = HTMLPurifier_Lexer::create($this->config);

Expand Down Expand Up @@ -560,7 +563,8 @@ public function test_tokenizeHTML_()
public function test_tokenizeHTML_ignoreIECondComment()
{
$this->assertTokenization(
'<!--[if IE]>foo<a>bar<!-- baz --><![endif]-->',
// typecast disables 'Bad character' inspection in PHPStorm
'<!--[if IE]>foo<a>bar' . (string) '<!-- baz --><![endif]-->',
array()
);
}
Expand Down Expand Up @@ -678,7 +682,7 @@ public function test_tokenizeHTML_prematureDivClose()
*/
protected function assertTokenization($input, $expect)
{
$lexer = new HTMLPurifier_Lexer_HTML5;
$lexer = new HTMLPurifier_Lexer_HTML5();

$result = $lexer->tokenizeHTML($input, $this->config, $this->context);

Expand Down

0 comments on commit 956ad0f

Please sign in to comment.