From 2a5e10cfd2e9eef9be3e6f152fe567c9a6ef9e68 Mon Sep 17 00:00:00 2001 From: Lars Moelleken Date: Sun, 23 Oct 2016 05:54:42 +0200 Subject: [PATCH] [+]: optimize performance --- src/voku/helper/HtmlDomParser.php | 68 ++++++++++--------------------- src/voku/helper/SimpleHtmlDom.php | 7 ---- tests/HtmlDomParserTest.php | 10 +++++ 3 files changed, 31 insertions(+), 54 deletions(-) diff --git a/src/voku/helper/HtmlDomParser.php b/src/voku/helper/HtmlDomParser.php index b5ec981..0e6cf42 100644 --- a/src/voku/helper/HtmlDomParser.php +++ b/src/voku/helper/HtmlDomParser.php @@ -48,10 +48,10 @@ class HtmlDomParser protected static $domLinkReplaceHelper = array( 'orig' => array('[', ']', '{', '}',), 'tmp' => array( - '!!!!HTML_DOM__SQUARE_BRACKET_LEFT!!!!', - '!!!!HTML_DOM__SQUARE_BRACKET_RIGHT!!!!', - '!!!!HTML_DOM__BRACKET_LEFT!!!!', - '!!!!HTML_DOM__BRACKET_RIGHT!!!!', + '!!!!SIMPLE_HTML_DOM__VOKU__SQUARE_BRACKET_LEFT!!!!', + '!!!!SIMPLE_HTML_DOM__VOKU__SQUARE_BRACKET_RIGHT!!!!', + '!!!!SIMPLE_HTML_DOM__VOKU__BRACKET_LEFT!!!!', + '!!!!SIMPLE_HTML_DOM__VOKU__BRACKET_RIGHT!!!!', ), ); @@ -61,10 +61,10 @@ class HtmlDomParser protected static $domReplaceHelper = array( 'orig' => array('&', '|', '+', '%'), 'tmp' => array( - '!!!!HTML_DOM__AMP!!!!', - '!!!!HTML_DOM__PIPE!!!!', - '!!!!HTML_DOM__PLUS!!!!', - '!!!!HTML_DOM__PERCENT!!!!', + '!!!!SIMPLE_HTML_DOM__VOKU__AMP!!!!', + '!!!!SIMPLE_HTML_DOM__VOKU__PIPE!!!!', + '!!!!SIMPLE_HTML_DOM__VOKU__PLUS!!!!', + '!!!!SIMPLE_HTML_DOM__VOKU__PERCENT!!!!', ), ); @@ -93,13 +93,6 @@ class HtmlDomParser */ protected $isDOMDocumentCreatedWithoutHtmlWrapper = false; - /** - * An random md5-hash, generated via "random_bytes()". - * - * @var string - */ - protected $randomHash; - /** * Constructor * @@ -107,11 +100,8 @@ class HtmlDomParser */ public function __construct($element = null) { - $this->randomHash = md5(Bootup::get_random_bytes(16)); $this->document = new \DOMDocument('1.0', $this->getEncoding()); - $this->addRandBytesToDomReplaceHelpers(); - // DOMDocument settings $this->document->preserveWhiteSpace = true; $this->document->formatOutput = true; @@ -135,22 +125,6 @@ public function __construct($element = null) } } - /** - * Add rand-bytes to the "Dom-Replace-Helper"-variables. - */ - protected function addRandBytesToDomReplaceHelpers() - { - /** @noinspection AlterInForeachInspection */ - foreach (self::$domLinkReplaceHelper['tmp'] as &$linkHelper) { - $linkHelper .= $this->randomHash; - } - - /** @noinspection AlterInForeachInspection */ - foreach (self::$domReplaceHelper['tmp'] as &$domHelper) { - $domHelper .= $this->randomHash; - } - } - /** * @param $name * @param $arguments @@ -300,19 +274,19 @@ public static function replaceToPreserveHtmlEntities($html) */ public static function putReplacedBackToPreserveHtmlEntities($html) { - return str_replace( - array_merge( - self::$domLinkReplaceHelper['tmp'], - self::$domReplaceHelper['tmp'], - array(' ') - ), - array_merge( - self::$domLinkReplaceHelper['orig'], - self::$domReplaceHelper['orig'], - array('') - ), - $html - ); + static $DOM_REPLACE__HELPER_CACHE = null; + if ($DOM_REPLACE__HELPER_CACHE === null) { + $DOM_REPLACE__HELPER_CACHE['tmp'] = array_merge( + self::$domLinkReplaceHelper['tmp'], + self::$domReplaceHelper['tmp'] + ); + $DOM_REPLACE__HELPER_CACHE['orig'] = array_merge( + self::$domLinkReplaceHelper['orig'], + self::$domReplaceHelper['orig'] + ); + } + + return str_replace($DOM_REPLACE__HELPER_CACHE['tmp'], $DOM_REPLACE__HELPER_CACHE['orig'], $html); } /** diff --git a/src/voku/helper/SimpleHtmlDom.php b/src/voku/helper/SimpleHtmlDom.php index 0737ddb..a22e9f4 100644 --- a/src/voku/helper/SimpleHtmlDom.php +++ b/src/voku/helper/SimpleHtmlDom.php @@ -494,11 +494,8 @@ protected function replaceChild($string) } if (!empty($newDocument)) { - $newDocument = $this->cleanHtmlWrapper($newDocument); - $newNode = $this->node->ownerDocument->importNode($newDocument->getDocument()->documentElement, true); - $this->node->appendChild($newNode); } @@ -522,10 +519,6 @@ protected function replaceNode($string) $newDocument = new HtmlDomParser($string); - // DEBUG - //echo $this->normalizeStringForComparision($newDocument->outertext) . "\n"; - //echo $this->normalizeStringForComparision($string) . "\n\n"; - if ($this->normalizeStringForComparision($newDocument->outertext) != $this->normalizeStringForComparision($string)) { throw new RuntimeException('Not valid HTML fragment'); } diff --git a/tests/HtmlDomParserTest.php b/tests/HtmlDomParserTest.php index 2da08cb..bd344d0 100644 --- a/tests/HtmlDomParserTest.php +++ b/tests/HtmlDomParserTest.php @@ -924,6 +924,16 @@ public function testGetElementsByClass() ); } + public function testUtf8AndBrokenHtmlEncoding() + { + $dom = new HtmlDomParser(); + $dom->load('hi

سلام

の家庭に、9 ☆<><'); + self::assertSame( + '

hi

سلام

の家庭に、9 ☆
', + $dom->innerHtml + ); + } + public function testEnforceEncoding() { $dom = new HtmlDomParser();