From 86a67fef482e0b6b54885ffe08696b3c061ec831 Mon Sep 17 00:00:00 2001
From: Niels Dossche <7771979+nielsdos@users.noreply.github.com>
Date: Sat, 15 Mar 2025 01:38:27 +0100
Subject: [PATCH 1/4] Fix GH-12231: SimpleXML xpath should warn when returning
other return types than node lists
Closes GH-18073.
---
NEWS | 4 ++++
UPGRADING | 5 +++++
ext/simplexml/simplexml.c | 22 ++++++++++++++++++++++
ext/simplexml/tests/008.phpt | 7 ++++---
ext/simplexml/tests/gh12231.phpt | 26 ++++++++++++++++++++++++++
5 files changed, 61 insertions(+), 3 deletions(-)
create mode 100644 ext/simplexml/tests/gh12231.phpt
diff --git a/NEWS b/NEWS
index 48e71a0a80632..e2c67a4a24d12 100644
--- a/NEWS
+++ b/NEWS
@@ -124,6 +124,10 @@ PHP NEWS
or a TypeError if read_and_close value is not compatible with int.
(David Carlier)
+- SimpleXML:
+ . Fixed bug GH-12231 (SimpleXML xpath should warn when returning other return
+ types than node lists). (nielsdos)
+
- SNMP:
. snmpget, snmpset, snmp_get2, snmp_set2, snmp_get3, snmp_set3 and
SNMP::__construct() throw an exception on invalid hostname, community
diff --git a/UPGRADING b/UPGRADING
index 11cca55bd1735..274f45d8f401b 100644
--- a/UPGRADING
+++ b/UPGRADING
@@ -82,6 +82,11 @@ PHP 8.5 UPGRADE NOTES
. A ValueError is now thrown when trying to set a cursor name that is too
long on a PDOStatement resulting from the Firebird driver.
+- SimpleXML:
+ - Passing an XPath expression that returns something other than a node set
+ to SimpleXMLElement::xpath() will now emit a warning and return false,
+ instead of silently failing and returning an empty array.
+
- SPL:
. ArrayObject no longer accepts enums, as modifying the $name or $value
properties can break engine assumptions.
diff --git a/ext/simplexml/simplexml.c b/ext/simplexml/simplexml.c
index 1de7ccc6e74e8..877280fb378dc 100644
--- a/ext/simplexml/simplexml.c
+++ b/ext/simplexml/simplexml.c
@@ -1215,6 +1215,21 @@ static int sxe_objects_compare(zval *object1, zval *object2) /* {{{ */
}
/* }}} */
+static const char *sxe_get_object_type_name(xmlXPathObjectType type)
+{
+ switch (type) {
+ case XPATH_BOOLEAN: return "bool";
+ case XPATH_NUMBER: return "number";
+ case XPATH_STRING: return "string";
+#ifdef LIBXML_XPTR_LOCS_ENABLED
+ case XPATH_POINT: return "point";
+ case XPATH_RANGE: return "range";
+ case XPATH_LOCATIONSET: return "location set";
+#endif
+ default: return "undefined";
+ }
+}
+
/* {{{ Runs XPath query on the XML data */
PHP_METHOD(SimpleXMLElement, xpath)
{
@@ -1271,6 +1286,13 @@ PHP_METHOD(SimpleXMLElement, xpath)
RETURN_FALSE;
}
+ if (UNEXPECTED(retval->type != XPATH_NODESET)) {
+ php_error_docref(NULL, E_WARNING, "XPath expression must return a node set, %s returned",
+ sxe_get_object_type_name(retval->type));
+ xmlXPathFreeObject(retval);
+ RETURN_FALSE;
+ }
+
result = retval->nodesetval;
if (result != NULL) {
diff --git a/ext/simplexml/tests/008.phpt b/ext/simplexml/tests/008.phpt
index c946c36dafe63..dea6f98eacfcc 100644
--- a/ext/simplexml/tests/008.phpt
+++ b/ext/simplexml/tests/008.phpt
@@ -39,8 +39,9 @@ array(1) {
}
}
}
-array(0) {
-}
-Warning: SimpleXMLElement::xpath(): Invalid expression in %s on line %d%A
+Warning: SimpleXMLElement::xpath(): XPath expression must return a node set, number returned in %s on line %d
+bool(false)
+
+Warning: SimpleXMLElement::xpath(): Invalid expression in %s on line %d
bool(false)
diff --git a/ext/simplexml/tests/gh12231.phpt b/ext/simplexml/tests/gh12231.phpt
new file mode 100644
index 0000000000000..efacd92b76f95
--- /dev/null
+++ b/ext/simplexml/tests/gh12231.phpt
@@ -0,0 +1,26 @@
+--TEST--
+GH-12231 (SimpleXML xpath should warn when returning other return types than node lists)
+--EXTENSIONS--
+simplexml
+--FILE--
+";
+$sxe = simplexml_load_string($xml);
+
+var_dump($sxe->xpath("count(//foo)"));
+var_dump($sxe->xpath("string(//foo)"));
+var_dump($sxe->xpath("boolean(//foo)"));
+var_dump(count($sxe->xpath("//foo")));
+
+?>
+--EXPECTF--
+Warning: SimpleXMLElement::xpath(): XPath expression must return a node set, number returned in %s on line %d
+bool(false)
+
+Warning: SimpleXMLElement::xpath(): XPath expression must return a node set, string returned in %s on line %d
+bool(false)
+
+Warning: SimpleXMLElement::xpath(): XPath expression must return a node set, bool returned in %s on line %d
+bool(false)
+int(2)
From 647baec5a4d3525033b05da48c5ed4ad51601d70 Mon Sep 17 00:00:00 2001
From: Niels Dossche <7771979+nielsdos@users.noreply.github.com>
Date: Sun, 16 Mar 2025 13:40:54 +0100
Subject: [PATCH 2/4] Fix GH-18090: DOM: Svg attributes and tag names are being
lowercased
Closes GH-18091.
---
NEWS | 2 ++
ext/dom/html5_parser.c | 12 ++++++++++--
ext/dom/tests/modern/html/parser/gh18090.phpt | 18 ++++++++++++++++++
.../html/parser/predefined_namespaces.phpt | 6 +++---
4 files changed, 33 insertions(+), 5 deletions(-)
create mode 100644 ext/dom/tests/modern/html/parser/gh18090.phpt
diff --git a/NEWS b/NEWS
index 286d03d10aff8..e7e917c6273da 100644
--- a/NEWS
+++ b/NEWS
@@ -35,6 +35,8 @@ PHP NEWS
- DOM:
. Fixed bug GH-17991 (Assertion failure dom_attr_value_write). (nielsdos)
. Fix weird unpack behaviour in DOM. (nielsdos)
+ . Fixed bug GH-18090 (DOM: Svg attributes and tag names are being lowercased).
+ (nielsdos)
- Fuzzer:
. Fixed bug GH-18081 (Memory leaks in error paths of fuzzer SAPI).
diff --git a/ext/dom/html5_parser.c b/ext/dom/html5_parser.c
index 0d7d2b9e7249d..f1dc2db53b25b 100644
--- a/ext/dom/html5_parser.c
+++ b/ext/dom/html5_parser.c
@@ -138,7 +138,9 @@ static lexbor_libxml2_bridge_status lexbor_libxml2_bridge_convert(
* If a prefix:name format is used, then the local name will be "prefix:name" and the prefix will be empty.
* There is however still somewhat of a concept of namespaces. There are three: HTML (the default), SVG, and MATHML. */
lxb_dom_element_t *element = lxb_dom_interface_element(node);
- const lxb_char_t *name = lxb_dom_element_local_name(element, NULL);
+ const lxb_char_t *name = lxb_dom_element_qualified_name(element, NULL);
+ ZEND_ASSERT(!element->node.prefix);
+
xmlNodePtr lxml_element = xmlNewDocNode(lxml_doc, NULL, name, NULL);
if (UNEXPECTED(lxml_element == NULL)) {
retval = LEXBOR_LIBXML2_BRIDGE_STATUS_OOM;
@@ -203,7 +205,13 @@ static lexbor_libxml2_bridge_status lexbor_libxml2_bridge_convert(
for (lxb_dom_attr_t *attr = element->first_attr; attr != NULL; attr = attr->next) {
/* Same namespace remark as for elements */
size_t local_name_length, value_length;
- const lxb_char_t *local_name = lxb_dom_attr_local_name(attr, &local_name_length);
+ const lxb_char_t *local_name = lxb_dom_attr_qualified_name(attr, &local_name_length);
+ if (attr->node.prefix) {
+ const char *pos = strchr((const char *) local_name, ':');
+ if (EXPECTED(pos)) {
+ local_name = (const lxb_char_t *) pos + 1;
+ }
+ }
const lxb_char_t *value = lxb_dom_attr_value(attr, &value_length);
if (UNEXPECTED(local_name_length >= INT_MAX || value_length >= INT_MAX)) {
diff --git a/ext/dom/tests/modern/html/parser/gh18090.phpt b/ext/dom/tests/modern/html/parser/gh18090.phpt
new file mode 100644
index 0000000000000..c32f5ddb51336
--- /dev/null
+++ b/ext/dom/tests/modern/html/parser/gh18090.phpt
@@ -0,0 +1,18 @@
+--TEST--
+GH-18090 (Svg attributes and tag names are being lowercased)
+--EXTENSIONS--
+dom
+--FILE--
+
', LIBXML_NOERROR)->saveHTML(), "\n";
+
+echo \Dom\HTMLDocument::createFromString('', LIBXML_NOERROR)->saveHTML(), "\n";
+
+echo \Dom\HTMLDocument::createFromString('', LIBXML_NOERROR)->querySelector('svg')->attributes[0]->name, "\n";
+?>
+--EXPECT--
+
+
+viewBox
diff --git a/ext/dom/tests/modern/html/parser/predefined_namespaces.phpt b/ext/dom/tests/modern/html/parser/predefined_namespaces.phpt
index 7e78460454e60..b4c07c6fb3bb8 100644
--- a/ext/dom/tests/modern/html/parser/predefined_namespaces.phpt
+++ b/ext/dom/tests/modern/html/parser/predefined_namespaces.phpt
@@ -47,7 +47,7 @@ echo $dom->saveXml();
svg http://www.w3.org/2000/svg
Attribute: width (NONE)
Attribute: height (NONE)
- Attribute: viewbox (NONE)
+ Attribute: viewBox (NONE)
rect http://www.w3.org/2000/svg
Attribute: id (NONE)
Attribute: x (NONE)
@@ -65,7 +65,7 @@ svg http://www.w3.org/1998/Math/MathML
Test
-