From 33e842a4f3e2d573689a2848936665bda3026522 Mon Sep 17 00:00:00 2001 From: Toon Verwerft Date: Fri, 24 Apr 2026 09:34:03 +0200 Subject: [PATCH] Add promote_namespaces function Moves prefixed xmlns declarations from descendant nodes to the document element while keeping the original prefix names, for SOAP servers that require eager namespace declarations on the envelope (e.g. Microsoft Business Central). Default namespaces and prefixes that conflict with an existing root declaration are left untouched. Backport of veewee/xml#103 to 3.x. On legacy DOM, libxml's namespace reconciliation may additionally disambiguate prefixes in conflicting subtrees; the result stays semantically identical. Refs php-soap/encoding#48 --- docs/dom.md | 59 +++++ .../Dom/Configurator/promote_namespaces.php | 21 ++ .../Document/promote_namespaces.php | 64 +++++ src/bootstrap.php | 2 + .../Configurator/PromoteNamespacesTest.php | 81 ++++++ .../Document/PromoteNamespacesTest.php | 238 ++++++++++++++++++ 6 files changed, 465 insertions(+) create mode 100644 src/Xml/Dom/Configurator/promote_namespaces.php create mode 100644 src/Xml/Dom/Manipulator/Document/promote_namespaces.php create mode 100644 tests/Xml/Dom/Configurator/PromoteNamespacesTest.php create mode 100644 tests/Xml/Dom/Manipulator/Document/PromoteNamespacesTest.php diff --git a/docs/dom.md b/docs/dom.md index 56bcc628..85e11650 100644 --- a/docs/dom.md +++ b/docs/dom.md @@ -533,6 +533,35 @@ Document::fromUnsafeDocument( ); ``` +#### promote_namespaces + +This configurator moves every prefixed `xmlns:*` declaration found on a +descendant element up to the document element, keeping the original prefix +names intact. It is the counterpart of `optimize_namespaces` for cases where +you need eager namespace declarations without renaming prefixes (e.g. SOAP +servers that validate namespace placement on the envelope). Default +namespaces (`xmlns="..."`) and prefixes that conflict with a declaration +already on the document element are left untouched. + +⚠️ Legacy DOM quirk: writing an xmlns declaration to the document element +triggers libxml's namespace reconciliation. When two descendants declare the +same prefix for different URIs, libxml rewrites the second subtree's +element prefixes (e.g. `a` -> `a1`) and pulls the extra xmlns up to the +root. Prefix references inside attribute *values* (e.g. `xsi:type="a:Thing"`) +are opaque strings and are not rewritten, so they may end up resolving +against the shadowing declaration on the original subtree. Upgrade to +`veewee/xml` 4.x if exact prefix preservation matters for your consumers. + +```php +use VeeWee\Xml\Dom\Document; +use function VeeWee\Xml\Dom\Configurator\promote_namespaces; + +Document::fromUnsafeDocument( + $document, + promote_namespaces() +); +``` + #### pretty_print Makes the output of the DOM document human-readable. @@ -987,6 +1016,36 @@ $doc->manipulate( ); ``` +#### promote_namespaces + +Moves every prefixed `xmlns:*` declaration found on a descendant element up to +the document element while preserving the original prefix names. Default +namespaces and prefixes that conflict with a declaration already on the +document element are left untouched. Unlike `optimize_namespaces`, prefixes +are not renamed, which matters when consumers perform strict XSD validation. + +⚠️ Legacy DOM quirk: writing an xmlns declaration to the document element +triggers libxml's namespace reconciliation. When two descendants declare the +same prefix for different URIs, libxml rewrites the second subtree's +element prefixes (e.g. `a` -> `a1`) and pulls the extra xmlns up to the +root. Prefix references inside attribute *values* (e.g. `xsi:type="a:Thing"`) +are opaque strings and are not rewritten, so they may end up resolving +against the shadowing declaration on the original subtree. Upgrade to +`veewee/xml` 4.x if exact prefix preservation matters for your consumers. + +```php +use DOMDocument; +use VeeWee\Xml\Dom\Document; +use function VeeWee\Xml\Dom\Manipulator\Document\promote_namespaces; + +$doc = Document::fromXmlString($xml); +$doc->manipulate( + static function (DOMDocument $document): void { + promote_namespaces($document); + } +); +``` + ### Element Element specific manipulators operate on `DOMElement` instances. diff --git a/src/Xml/Dom/Configurator/promote_namespaces.php b/src/Xml/Dom/Configurator/promote_namespaces.php new file mode 100644 index 00000000..3f56ca6d --- /dev/null +++ b/src/Xml/Dom/Configurator/promote_namespaces.php @@ -0,0 +1,21 @@ + `a1`) and pulls the extra xmlns up + * to the root. The result is still semantically equivalent XML, but opaque + * prefix references inside attribute values (e.g. xsi:type="a:Thing") are + * not rewritten and may resolve against the shadowing declaration on the + * original subtree. Upgrade to veewee/xml 4.x if exact prefix preservation + * matters for your consumers. + * + * @throws RuntimeException + */ +function promote_namespaces(DOMDocument $document): void +{ + $documentElement = document_element()($document); + + /** @var array $promoted prefix => URI */ + $promoted = pull( + xmlns_attributes_list($documentElement) + ->filter(static fn (DOMNameSpaceNode $attr): bool => $attr->prefix !== ''), + static fn (DOMNameSpaceNode $attr): string => $attr->namespaceURI, + static fn (DOMNameSpaceNode $attr): string => $attr->prefix, + ); + + foreach ($documentElement->getElementsByTagName('*') as $element) { + $prefixedXmlns = xmlns_attributes_list($element) + ->filter(static fn (DOMNameSpaceNode $attr): bool => $attr->prefix !== ''); + + foreach ($prefixedXmlns as $attr) { + $prefix = $attr->prefix; + $uri = $attr->namespaceURI; + + if (!array_key_exists($prefix, $promoted)) { + xmlns_attribute($prefix, $uri)($documentElement); + $promoted[$prefix] = $uri; + } + + if ($promoted[$prefix] === $uri) { + remove_namespace($attr, $element); + } + } + } +} diff --git a/src/bootstrap.php b/src/bootstrap.php index a6e0898a..62fd346c 100644 --- a/src/bootstrap.php +++ b/src/bootstrap.php @@ -28,6 +28,7 @@ 'Xml\Dom\Configurator\normalize' => __DIR__.'/Xml/Dom/Configurator/normalize.php', 'Xml\Dom\Configurator\optimize_namespaces' => __DIR__.'/Xml/Dom/Configurator/optimize_namespaces.php', 'Xml\Dom\Configurator\pretty_print' => __DIR__.'/Xml/Dom/Configurator/pretty_print.php', + 'Xml\Dom\Configurator\promote_namespaces' => __DIR__.'/Xml/Dom/Configurator/promote_namespaces.php', 'Xml\Dom\Configurator\traverse' => __DIR__.'/Xml/Dom/Configurator/traverse.php', 'Xml\Dom\Configurator\trim_spaces' => __DIR__.'/Xml/Dom/Configurator/trim_spaces.php', 'Xml\Dom\Configurator\utf8' => __DIR__.'/Xml/Dom/Configurator/utf8.php', @@ -58,6 +59,7 @@ 'Xml\Dom\Locator\root_namespace' => __DIR__.'/Xml/Dom/Locator/root_namespace.php', 'Xml\Dom\Manipulator\Attribute\rename' => __DIR__.'/Xml/Dom/Manipulator/Attribute/rename.php', 'Xml\Dom\Manipulator\Document\optimize_namespaces' => __DIR__.'/Xml/Dom/Manipulator/Document/optimize_namespaces.php', + 'Xml\Dom\Manipulator\Document\promote_namespaces' => __DIR__.'/Xml/Dom/Manipulator/Document/promote_namespaces.php', 'Xml\Dom\Manipulator\Element\copy_named_xmlns_attributes' => __DIR__.'/Xml/Dom/Manipulator/Element/copy_named_xmlns_attributes.php', 'Xml\Dom\Manipulator\Element\rename' => __DIR__.'/Xml/Dom/Manipulator/Element/rename.php', 'Xml\Dom\Manipulator\Node\append_external_node' => __DIR__.'/Xml/Dom/Manipulator/Node/append_external_node.php', diff --git a/tests/Xml/Dom/Configurator/PromoteNamespacesTest.php b/tests/Xml/Dom/Configurator/PromoteNamespacesTest.php new file mode 100644 index 00000000..8a5648ad --- /dev/null +++ b/tests/Xml/Dom/Configurator/PromoteNamespacesTest.php @@ -0,0 +1,81 @@ +map(document_element())); + + static::assertSame($expected, $actual); + } + + public static function provideXmls(): iterable + { + yield 'no-action' => [ + '', + '', + ]; + + yield 'child-to-root' => [ + << + + + EOXML, + << + + + EOXML, + ]; + + yield 'mixed-namespaces' => [ + << + + + + EOXML, + << + + + + EOXML, + ]; + + yield 'soap-like' => [ + << + + + 1 + + + + EOXML, + << + + + 1 + + + + EOXML, + ]; + } +} diff --git a/tests/Xml/Dom/Manipulator/Document/PromoteNamespacesTest.php b/tests/Xml/Dom/Manipulator/Document/PromoteNamespacesTest.php new file mode 100644 index 00000000..388a91c5 --- /dev/null +++ b/tests/Xml/Dom/Manipulator/Document/PromoteNamespacesTest.php @@ -0,0 +1,238 @@ +toUnsafeDocument()); + + $actual = xml_string()($doc->map(document_element())); + static::assertSame($expected, $actual); + } + + public static function provideXmls(): iterable + { + yield 'no-namespaces' => [ + '', + '', + ]; + + yield 'already-on-root' => [ + << + + + EOXML, + << + + + EOXML, + ]; + + yield 'child-to-root' => [ + << + + + EOXML, + << + + + EOXML, + ]; + + yield 'duplicate-on-children' => [ + << + + + + EOXML, + << + + + + EOXML, + ]; + + yield 'mixed-namespaces' => [ + << + + + + EOXML, + << + + + + EOXML, + ]; + + yield 'conflict-root-vs-child' => [ + << + + + EOXML, + << + + + EOXML, + ]; + + yield 'conflict-among-children' => [ + // When two sibling elements use the same prefix for different URIs, + // the first wins the root spot; on legacy DOM, libxml reconciles the + // second subtree and renames its prefix (e.g. `a` -> `a1`) so both + // namespaces end up declared on the document element. This is the + // expected outcome — all declarations are promoted, just with + // prefix disambiguation. Upgrade to veewee/xml 4.x if you need the + // original prefix preserved on the conflicting subtree. + << + + + + EOXML, + << + + + + EOXML, + ]; + + yield 'nested-deep' => [ + << + EOXML, + << + EOXML, + ]; + + yield 'default-namespace' => [ + << + + + EOXML, + << + + + EOXML, + ]; + + yield 'default-and-prefixed-on-same-element' => [ + // When an element carries both a default xmlns and a prefixed + // xmlns:* declaration, only the prefixed one is promoted. On + // legacy DOM, libxml's reconciliation additionally rewrites the + // element's default namespace as a prefixed one on the root (here + // `` becomes ``). The result remains + // semantically identical XML. + << + + + EOXML, + << + + + EOXML, + ]; + + yield 'empty-namespace' => [ + << + + + EOXML, + << + + + EOXML, + ]; + + yield 'attributes-with-namespaces' => [ + << + + + EOXML, + << + + + EOXML, + ]; + + yield 'conflict-among-children-with-xsi-type' => [ + // Pins the interaction between libxml's legacy-DOM prefix + // disambiguation (see `conflict-among-children`) and an + // xsi:type attribute whose value references a conflicting + // prefix. Because xsi:type values are opaque strings at the + // DOM level, libxml cannot rewrite the `a:Thing` reference to + // match the renamed element prefix. In this case the + // declaration on `` shadows the root one so the + // xsi:type still resolves to http://two, but callers that + // emit such documents should upgrade to veewee/xml 4.x, which + // preserves prefixes exactly. + << + + + + EOXML, + << + + + + EOXML, + ]; + + yield 'soap-like' => [ + << + + + 1 + + + + EOXML, + << + + + 1 + + + + EOXML, + ]; + } +}