From 080fd14458d3185b879ee2d0fbb6987b90539469 Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Thu, 28 Aug 2025 21:55:33 +0200 Subject: [PATCH 1/2] Fix GH-19612: Mitigate libxml2 tree dictionary bug This code is very similar to code on PHP 8.4 and higher, but the mitigation is extended to entity references and to attribute children. Closes GH-19620. --- NEWS | 3 ++ ext/dom/document.c | 61 ++++++++++++++++++++++++++++++++++++-- ext/dom/tests/gh19612.phpt | 30 +++++++++++++++++++ 3 files changed, 92 insertions(+), 2 deletions(-) create mode 100644 ext/dom/tests/gh19612.phpt diff --git a/NEWS b/NEWS index a85a48b3e0f54..0435cb4a8c5c0 100644 --- a/NEWS +++ b/NEWS @@ -15,6 +15,9 @@ PHP NEWS . Fixed date_sunrise() and date_sunset() with partial-hour UTC offset. (ilutov) +- DOM: + . Fixed bug GH-19612 (Mitigate libxml2 tree dictionary bug). (nielsdos) + - FPM: . Fixed failed debug assertion when php_admin_value setting fails. (ilutov) diff --git a/ext/dom/document.c b/ext/dom/document.c index e622a09309b6e..e48cafbabe9cd 100644 --- a/ext/dom/document.c +++ b/ext/dom/document.c @@ -1076,10 +1076,62 @@ static void php_dom_transfer_document_ref(xmlNodePtr node, php_libxml_ref_obj *n } } +/* Workaround for bug that was fixed in https://github.com/GNOME/libxml2/commit/4bc3ebf3eaba352fbbce2ef70ad00a3c7752478a + * and https://github.com/GNOME/libxml2/commit/bc7ab5a2e61e4b36accf6803c5b0e245c11154b1 */ +#if LIBXML_VERSION < 21300 +static xmlChar *libxml_copy_dicted_string(xmlDictPtr src_dict, xmlDictPtr dst_dict, xmlChar *str) +{ + if (str == NULL) { + return NULL; + } + if (xmlDictOwns(src_dict, str) == 1) { + if (dst_dict == NULL) { + return xmlStrdup(str); + } + return BAD_CAST xmlDictLookup(dst_dict, str, -1); + } + return str; +} + +static void libxml_fixup_name_and_content(xmlDocPtr src_doc, xmlDocPtr dst_doc, xmlNodePtr node) +{ + if (node->type == XML_ENTITY_REF_NODE) { + node->children = NULL; /* Break link with original document. */ + } + if (src_doc != NULL && src_doc->dict != NULL) { + ZEND_ASSERT(dst_doc != src_doc); + node->name = libxml_copy_dicted_string(src_doc->dict, dst_doc->dict, BAD_CAST node->name); + node->content = libxml_copy_dicted_string(src_doc->dict, NULL, node->content); + } +} + +static void libxml_fixup_name_and_content_outer(xmlDocPtr src_doc, xmlDocPtr dst_doc, xmlNodePtr node) +{ + libxml_fixup_name_and_content(src_doc, dst_doc, node); + + if (node->type == XML_ELEMENT_NODE) { + for (xmlAttrPtr attr = node->properties; attr != NULL; attr = attr->next) { + libxml_fixup_name_and_content(src_doc, dst_doc, (xmlNodePtr) attr); + for (xmlNodePtr attr_child = attr->children; attr_child != NULL; attr_child = attr_child->next) { + libxml_fixup_name_and_content(src_doc, dst_doc, attr_child); + } + } + } + + if (node->type == XML_ELEMENT_NODE || node->type == XML_ATTRIBUTE_NODE) { + for (xmlNodePtr child = node->children; child != NULL; child = child->next) { + libxml_fixup_name_and_content_outer(src_doc, dst_doc, child); + } + } +} +#endif + bool php_dom_adopt_node(xmlNodePtr nodep, dom_object *dom_object_new_document, xmlDocPtr new_document) { - php_libxml_invalidate_node_list_cache_from_doc(nodep->doc); - if (nodep->doc != new_document) { + xmlDocPtr old_doc = nodep->doc; + + php_libxml_invalidate_node_list_cache_from_doc(old_doc); + if (old_doc != new_document) { php_libxml_invalidate_node_list_cache(dom_object_new_document->document); /* Note for ATTRIBUTE_NODE: specified is always true in ext/dom, @@ -1089,6 +1141,11 @@ bool php_dom_adopt_node(xmlNodePtr nodep, dom_object *dom_object_new_document, x return false; } +#if LIBXML_VERSION < 21300 + /* Must be first before transferring the ref to ensure the old document dictionary stays alive. */ + libxml_fixup_name_and_content_outer(old_doc, new_document, nodep); +#endif + php_dom_transfer_document_ref(nodep, dom_object_new_document->document); } else { xmlUnlinkNode(nodep); diff --git a/ext/dom/tests/gh19612.phpt b/ext/dom/tests/gh19612.phpt new file mode 100644 index 0000000000000..38554f3c83605 --- /dev/null +++ b/ext/dom/tests/gh19612.phpt @@ -0,0 +1,30 @@ +--TEST-- +GH-19612 (Mitigate libxml2 tree dictionary bug) +--EXTENSIONS-- +dom +--FILE-- +loadXML(<< +]> + +XML); +$html = new DOMDocument; +$html->loadHTML('

foo

', LIBXML_NOERROR); +$p = $html->documentElement->firstChild->firstChild; +$p->appendChild($html->adoptNode($xml->documentElement->firstElementChild->cloneNode(true))); + +echo $html->saveXML(); +echo $xml->saveXML(); +?> +--EXPECT-- + + +

foo

+ + +]> + From 9b960713c6f12817286e039092b417e8e096a4cc Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Wed, 3 Sep 2025 23:00:09 +0200 Subject: [PATCH 2/2] [ci skip] Fix test title --- ext/standard/tests/streams/bug69521.phpt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ext/standard/tests/streams/bug69521.phpt b/ext/standard/tests/streams/bug69521.phpt index 9ec244f79963e..d335a68736eb4 100644 --- a/ext/standard/tests/streams/bug69521.phpt +++ b/ext/standard/tests/streams/bug69521.phpt @@ -1,5 +1,5 @@ --TEST-- -Bug #69521 Segfault in gc_collect_cycles() +Bug #69521 (Segfault in gc_collect_cycles()) --FILE--