From 7ff7c10ab2961703ac1752e95b4ff60ee4ee6643 Mon Sep 17 00:00:00 2001 From: Christian Bewernitz Date: Sat, 29 Oct 2022 23:10:55 +0200 Subject: [PATCH] Merge pull request from GHSA-crh6-fp67-6883 * fix: Prevent inserting DOM nodes when they are not well-formed In case such a DOM would be created, the part that is not well formed will be transformed into text nodes, in which xml specific characters like `<` and `>` are encoded accordingly. In the upcoming version 0.9.0 those text nodes will no longer be added an an error will be thrown instead. This change can break your code, if you relied on this behavior, e.g. multiple root elements in the past. We consider it more important to align with the specs that we want to be aligned with, considering the potential security issues that might derive from people not being aware of the difference in behavior. Resolves https://github.com/xmldom/xmldom/security/advisories/GHSA-crh6-fp67-6883 Related Spec: https://dom.spec.whatwg.org/#concept-node-ensure-pre-insertion-validity * fix: Prevent setting documentElement if node is not inserted. --- lib/dom.js | 251 ++++++++++++++---- test/dom/document.test.js | 70 ++++- test/parse/parse-element.test.js | 8 +- .../xmltest/__snapshots__/not-wf.test.js.snap | 19 +- 4 files changed, 290 insertions(+), 58 deletions(-) diff --git a/lib/dom.js b/lib/dom.js index c6ebfe95d..fd1518d57 100644 --- a/lib/dom.js +++ b/lib/dom.js @@ -158,14 +158,14 @@ NodeList.prototype = { * The number of nodes in the list. The range of valid child node indices is 0 to length-1 inclusive. * @standard level1 */ - length:0, + length:0, /** * Returns the indexth item in the collection. If index is greater than or equal to the number of nodes in the list, this returns null. * @standard level1 - * @param index unsigned long + * @param index unsigned long * Index into the collection. * @return Node - * The node at the indexth position in the NodeList, or null if that is not a valid index. + * The node at the indexth position in the NodeList, or null if that is not a valid index. */ item: function(index) { return this[index] || null; @@ -175,7 +175,31 @@ NodeList.prototype = { serializeToString(this[i],buf,isHTML,nodeFilter); } return buf.join(''); - } + }, + /** + * @private + * @param {function (Node):boolean} predicate + * @returns {Node | undefined} + */ + find: function (predicate) { + return Array.prototype.find.call(this, predicate); + }, + /** + * @private + * @param {function (Node):boolean} predicate + * @returns {Node[]} + */ + filter: function (predicate) { + return Array.prototype.filter.call(this, predicate); + }, + /** + * @private + * @param {Node} item + * @returns {number} + */ + indexOf: function (item) { + return Array.prototype.indexOf.call(this, item); + }, }; function LiveNodeList(node,refresh){ @@ -209,7 +233,7 @@ _extends(LiveNodeList,NodeList); * but this is simply to allow convenient enumeration of the contents of a NamedNodeMap, * and does not imply that the DOM specifies an order to these Nodes. * NamedNodeMap objects in the DOM are live. - * used for attributes or DocumentType entities + * used for attributes or DocumentType entities */ function NamedNodeMap() { }; @@ -253,7 +277,7 @@ function _removeNamedNode(el,list,attr){ } } }else{ - throw DOMException(NOT_FOUND_ERR,new Error(el.tagName+'@'+attr)) + throw new DOMException(NOT_FOUND_ERR,new Error(el.tagName+'@'+attr)) } } NamedNodeMap.prototype = { @@ -298,10 +322,10 @@ NamedNodeMap.prototype = { var attr = this.getNamedItem(key); _removeNamedNode(this._ownerElement,this,attr); return attr; - - + + },// raises: NOT_FOUND_ERR,NO_MODIFICATION_ALLOWED_ERR - + //for level2 removeNamedItemNS:function(namespaceURI,localName){ var attr = this.getNamedItemNS(namespaceURI,localName); @@ -447,10 +471,10 @@ Node.prototype = { prefix : null, localName : null, // Modified in DOM Level 2: - insertBefore:function(newChild, refChild){//raises + insertBefore:function(newChild, refChild){//raises return _insertBefore(this,newChild,refChild); }, - replaceChild:function(newChild, oldChild){//raises + replaceChild:function(newChild, oldChild){//raises this.insertBefore(newChild,oldChild); if(oldChild){ this.removeChild(oldChild); @@ -656,48 +680,177 @@ function _removeChild (parentNode, child) { _onUpdateChild(parentNode.ownerDocument, parentNode); return child; } + +/** + * Returns `true` if `node` can be a parent for insertion. + * @param {Node} node + * @returns {boolean} + */ +function hasValidParentNodeType(node) { + return ( + node && + (node.nodeType === Node.DOCUMENT_NODE || node.nodeType === Node.DOCUMENT_FRAGMENT_NODE || node.nodeType === Node.ELEMENT_NODE) + ); +} + +/** + * Returns `true` if `node` can be inserted according to it's `nodeType`. + * @param {Node} node + * @returns {boolean} + */ +function hasInsertableNodeType(node) { + return ( + node && + (isElementNode(node) || + isTextNode(node) || + isDocTypeNode(node) || + node.nodeType === Node.DOCUMENT_FRAGMENT_NODE || + node.nodeType === Node.COMMENT_NODE || + node.nodeType === Node.PROCESSING_INSTRUCTION_NODE) + ); +} + +/** + * Returns true if `node` is a DOCTYPE node + * @param {Node} node + * @returns {boolean} + */ +function isDocTypeNode(node) { + return node && node.nodeType === Node.DOCUMENT_TYPE_NODE; +} + +/** + * Returns true if the node is an element + * @param {Node} node + * @returns {boolean} + */ +function isElementNode(node) { + return node && node.nodeType === Node.ELEMENT_NODE; +} +/** + * Returns true if `node` is a text node + * @param {Node} node + * @returns {boolean} + */ +function isTextNode(node) { + return node && node.nodeType === Node.TEXT_NODE; +} + +/** + * Check if en element node can be inserted before `child`, or at the end if child is falsy, + * according to the presence and position of a doctype node on the same level. + * + * @param {Document} doc The document node + * @param {Node} child the node that would become the nextSibling if the element would be inserted + * @returns {boolean} `true` if an element can be inserted before child + * @private + * https://dom.spec.whatwg.org/#concept-node-ensure-pre-insertion-validity + */ +function isElementInsertionPossible(doc, child) { + var parentChildNodes = doc.childNodes || []; + if (parentChildNodes.find(isElementNode) || isDocTypeNode(child)) { + return false; + } + var docTypeNode = parentChildNodes.find(isDocTypeNode); + return !(child && docTypeNode && parentChildNodes.indexOf(docTypeNode) > parentChildNodes.indexOf(child)); +} /** - * preformance key(refChild == null) + * @private + * @param {Node} parent the parent node to insert `node` into + * @param {Node} node the node to insert + * @param {Node=} child the node that should become the `nextSibling` of `node` + * @returns {Node} + * @throws DOMException for several node combinations that would create a DOM that is not well-formed. + * @throws DOMException if `child` is provided but is not a child of `parent`. + * @see https://dom.spec.whatwg.org/#concept-node-ensure-pre-insertion-validity */ -function _insertBefore(parentNode,newChild,nextChild){ - var cp = newChild.parentNode; +function _insertBefore(parent, node, child) { + if (!hasValidParentNodeType(parent)) { + throw new DOMException(HIERARCHY_REQUEST_ERR, 'Unexpected parent node type ' + parent.nodeType); + } + if (child && child.parentNode !== parent) { + throw new DOMException(NOT_FOUND_ERR, 'child not in parent'); + } + if ( + !hasInsertableNodeType(node) || + // the sax parser currently adds top level text nodes, this will be fixed in 0.9.0 + // || (node.nodeType === Node.TEXT_NODE && parent.nodeType === Node.DOCUMENT_NODE) + (isDocTypeNode(node) && parent.nodeType !== Node.DOCUMENT_NODE) + ) { + throw new DOMException( + HIERARCHY_REQUEST_ERR, + 'Unexpected node type ' + node.nodeType + ' for parent node type ' + parent.nodeType + ); + } + var parentChildNodes = parent.childNodes || []; + var nodeChildNodes = node.childNodes || []; + if (parent.nodeType === Node.DOCUMENT_NODE) { + if (node.nodeType === Node.DOCUMENT_FRAGMENT_NODE) { + let nodeChildElements = nodeChildNodes.filter(isElementNode); + if (nodeChildElements.length > 1 || nodeChildNodes.find(isTextNode)) { + throw new DOMException(HIERARCHY_REQUEST_ERR, 'More than one element or text in fragment'); + } + if (nodeChildElements.length === 1 && !isElementInsertionPossible(parent, child)) { + throw new DOMException(HIERARCHY_REQUEST_ERR, 'Element in fragment can not be inserted before doctype'); + } + } + if (isElementNode(node)) { + if (parentChildNodes.find(isElementNode) || !isElementInsertionPossible(parent, child)) { + throw new DOMException(HIERARCHY_REQUEST_ERR, 'Only one element can be added and only after doctype'); + } + } + if (isDocTypeNode(node)) { + if (parentChildNodes.find(isDocTypeNode)) { + throw new DOMException(HIERARCHY_REQUEST_ERR, 'Only one doctype is allowed'); + } + let parentElementChild = parentChildNodes.find(isElementNode); + if (child && parentChildNodes.indexOf(parentElementChild) < parentChildNodes.indexOf(child)) { + throw new DOMException(HIERARCHY_REQUEST_ERR, 'Doctype can only be inserted before an element'); + } + if (!child && parentElementChild) { + throw new DOMException(HIERARCHY_REQUEST_ERR, 'Doctype can not be appended since element is present'); + } + } + } + + var cp = node.parentNode; if(cp){ - cp.removeChild(newChild);//remove and update + cp.removeChild(node);//remove and update } - if(newChild.nodeType === DOCUMENT_FRAGMENT_NODE){ - var newFirst = newChild.firstChild; + if(node.nodeType === DOCUMENT_FRAGMENT_NODE){ + var newFirst = node.firstChild; if (newFirst == null) { - return newChild; + return node; } - var newLast = newChild.lastChild; + var newLast = node.lastChild; }else{ - newFirst = newLast = newChild; + newFirst = newLast = node; } - var pre = nextChild ? nextChild.previousSibling : parentNode.lastChild; + var pre = child ? child.previousSibling : parent.lastChild; newFirst.previousSibling = pre; - newLast.nextSibling = nextChild; - - + newLast.nextSibling = child; + + if(pre){ pre.nextSibling = newFirst; }else{ - parentNode.firstChild = newFirst; + parent.firstChild = newFirst; } - if(nextChild == null){ - parentNode.lastChild = newLast; + if(child == null){ + parent.lastChild = newLast; }else{ - nextChild.previousSibling = newLast; + child.previousSibling = newLast; } do{ - newFirst.parentNode = parentNode; + newFirst.parentNode = parent; }while(newFirst !== newLast && (newFirst= newFirst.nextSibling)) - _onUpdateChild(parentNode.ownerDocument||parentNode,parentNode); - //console.log(parentNode.lastChild.nextSibling == null) - if (newChild.nodeType == DOCUMENT_FRAGMENT_NODE) { - newChild.firstChild = newChild.lastChild = null; + _onUpdateChild(parent.ownerDocument||parent, parent); + //console.log(parent.lastChild.nextSibling == null) + if (node.nodeType == DOCUMENT_FRAGMENT_NODE) { + node.firstChild = node.lastChild = null; } - return newChild; + return node; } /** @@ -752,11 +905,13 @@ Document.prototype = { } return newChild; } - if(this.documentElement == null && newChild.nodeType == ELEMENT_NODE){ + _insertBefore(this, newChild, refChild); + newChild.ownerDocument = this; + if (this.documentElement === null && newChild.nodeType === ELEMENT_NODE) { this.documentElement = newChild; } - return _insertBefore(this,newChild,refChild),(newChild.ownerDocument = this),newChild; + return newChild; }, removeChild : function(oldChild){ if(this.documentElement == oldChild){ @@ -950,7 +1105,7 @@ Element.prototype = { var attr = this.getAttributeNode(name) attr && this.removeAttributeNode(attr); }, - + //four real opeartion method appendChild:function(newChild){ if(newChild.nodeType === DOCUMENT_FRAGMENT_NODE){ @@ -974,7 +1129,7 @@ Element.prototype = { var old = this.getAttributeNodeNS(namespaceURI, localName); old && this.removeAttributeNode(old); }, - + hasAttributeNS : function(namespaceURI, localName){ return this.getAttributeNodeNS(namespaceURI, localName)!=null; }, @@ -990,7 +1145,7 @@ Element.prototype = { getAttributeNodeNS : function(namespaceURI, localName){ return this.attributes.getNamedItemNS(namespaceURI, localName); }, - + getElementsByTagName : function(tagName){ return new LiveNodeList(this,function(base){ var ls = []; @@ -1011,7 +1166,7 @@ Element.prototype = { } }); return ls; - + }); } }; @@ -1040,7 +1195,7 @@ CharacterData.prototype = { }, insertData: function(offset,text) { this.replaceData(offset,0,text); - + }, appendChild:function(newChild){ throw new Error(ExceptionMessage[HIERARCHY_REQUEST_ERR]) @@ -1134,7 +1289,7 @@ function nodeSerializeToString(isHtml,nodeFilter){ var refNode = this.nodeType == 9 && this.documentElement || this; var prefix = refNode.prefix; var uri = refNode.namespaceURI; - + if(uri && prefix == null){ //console.log(prefix) var prefix = refNode.lookupPrefix(uri); @@ -1167,8 +1322,8 @@ function needNamespaceDefine(node, isHTML, visibleNamespaces) { if (prefix === "xml" && uri === NAMESPACE.XML || uri === NAMESPACE.XMLNS) { return false; } - - var i = visibleNamespaces.length + + var i = visibleNamespaces.length while (i--) { var ns = visibleNamespaces[i]; // get namespace prefix @@ -1219,7 +1374,7 @@ function serializeToString(node,buf,isHTML,nodeFilter,visibleNamespaces){ var len = attrs.length; var child = node.firstChild; var nodeName = node.tagName; - + isHTML = NAMESPACE.isHTML(node.namespaceURI) || isHTML var prefixedNodeName = nodeName @@ -1278,14 +1433,14 @@ function serializeToString(node,buf,isHTML,nodeFilter,visibleNamespaces){ serializeToString(attr,buf,isHTML,nodeFilter,visibleNamespaces); } - // add namespace for current node + // add namespace for current node if (nodeName === prefixedNodeName && needNamespaceDefine(node, isHTML, visibleNamespaces)) { var prefix = node.prefix||''; var uri = node.namespaceURI; addSerializedAttribute(buf, prefix ? 'xmlns:' + prefix : "xmlns", uri); visibleNamespaces.push({ prefix: prefix, namespace:uri }); } - + if(child || isHTML && !/^(?:meta|link|img|br|hr|input)$/i.test(nodeName)){ buf.push('>'); //if is cdata child node @@ -1500,7 +1655,7 @@ try{ } } }) - + function getTextContent(node){ switch(node.nodeType){ case ELEMENT_NODE: diff --git a/test/dom/document.test.js b/test/dom/document.test.js index 525a69c8c..a53e8afef 100644 --- a/test/dom/document.test.js +++ b/test/dom/document.test.js @@ -1,7 +1,7 @@ -'use strict' +'use strict'; -const { getTestParser } = require('../get-test-parser') -const { DOMImplementation } = require('../../lib/dom') +const { getTestParser } = require('../get-test-parser'); +const { DOMImplementation, DOMException } = require('../../lib/dom'); const INPUT = (first = '', second = '', third = '', fourth = '') => ` @@ -99,4 +99,68 @@ describe('Document.prototype', () => { expect(doc.firstChild === doctype).toBe(true) }) }) + describe('insertBefore', () => { + it('should insert the first element and set `documentElement`', () => { + const doc = new DOMImplementation().createDocument(null, ''); + expect(doc.childNodes).toHaveLength(0); + expect(doc.documentElement).toBeNull(); + const root = doc.createElement('root'); + doc.insertBefore(root); + expect(doc.documentElement).toBe(root); + expect(doc.childNodes).toHaveLength(1); + expect(doc.childNodes.item(0)).toBe(root); + }); + it('should prevent inserting a second element', () => { + const doc = new DOMImplementation().createDocument(null, ''); + const root = doc.createElement('root'); + const second = doc.createElement('second'); + doc.insertBefore(root); + expect(() => doc.insertBefore(second)).toThrow(DOMException); + expect(doc.documentElement).toBe(root); + expect(doc.childNodes).toHaveLength(1); + }); + it('should prevent inserting an element before a doctype', () => { + const impl = new DOMImplementation(); + const doctype = impl.createDocumentType('DT'); + const doc = impl.createDocument(null, '', doctype); + expect(doc.childNodes).toHaveLength(1); + const root = doc.createElement('root'); + expect(() => doc.insertBefore(root, doctype)).toThrow(DOMException); + expect(doc.documentElement).toBeNull(); + expect(doc.childNodes).toHaveLength(1); + expect(root.parentNode).toBeNull(); + }); + it('should prevent inserting a second doctype', () => { + const impl = new DOMImplementation(); + const doctype = impl.createDocumentType('DT'); + const doctype2 = impl.createDocumentType('DT2'); + const doc = impl.createDocument(null, '', doctype); + expect(doc.childNodes).toHaveLength(1); + expect(() => doc.insertBefore(doctype2)).toThrow(DOMException); + expect(doc.childNodes).toHaveLength(1); + }); + it('should prevent inserting a doctype after an element', () => { + const impl = new DOMImplementation(); + const doc = impl.createDocument(null, ''); + const root = doc.createElement('root'); + doc.insertBefore(root); + const doctype = impl.createDocumentType('DT'); + expect(doc.childNodes).toHaveLength(1); + + expect(() => doc.insertBefore(doctype)).toThrow(DOMException); + + expect(doc.childNodes).toHaveLength(1); + }); + it('should prevent inserting before an child which is not a child of parent', () => { + const doc = new DOMImplementation().createDocument(null, ''); + const root = doc.createElement('root'); + const withoutParent = doc.createElement('second'); + + expect(() => doc.insertBefore(root, withoutParent)).toThrow(DOMException); + + expect(doc.documentElement).toBeNull(); + expect(doc.childNodes).toHaveLength(0); + expect(root.parentNode).toBeNull(); + }); + }); }) diff --git a/test/parse/parse-element.test.js b/test/parse/parse-element.test.js index bc99f273e..5d8c4ed85 100644 --- a/test/parse/parse-element.test.js +++ b/test/parse/parse-element.test.js @@ -41,10 +41,10 @@ describe('XML Node Parse', () => { it('sibling closing tag with whitespace', () => { const actual = new DOMParser() - .parseFromString(`Harry Potter`, 'text/xml') - .toString() - expect(actual).toBe(`Harry Potter`) - }) + .parseFromString(`Harry Potter`, 'text/xml') + .toString(); + expect(actual).toBe(`Harry Potter`); + }); describe('simple attributes', () => { describe('nothing special', () => { diff --git a/test/xmltest/__snapshots__/not-wf.test.js.snap b/test/xmltest/__snapshots__/not-wf.test.js.snap index cffe47def..5bc160271 100644 --- a/test/xmltest/__snapshots__/not-wf.test.js.snap +++ b/test/xmltest/__snapshots__/not-wf.test.js.snap @@ -315,14 +315,22 @@ Object { exports[`xmltest/not-wellformed standalone should match 040.xml with snapshot 1`] = ` Object { "actual": " -", +<doc>", + "error": Array [ + "[xmldom error] element parse error: Error: Hierarchy request error: Only one element can be added and only after doctype +@#[line:2,col:1]", + ], } `; exports[`xmltest/not-wellformed standalone should match 041.xml with snapshot 1`] = ` Object { "actual": " -", +<doc>", + "error": Array [ + "[xmldom error] element parse error: Error: Hierarchy request error: Only one element can be added and only after doctype +@#[line:2,col:1]", + ], } `; @@ -342,7 +350,12 @@ Illegal data exports[`xmltest/not-wellformed standalone should match 044.xml with snapshot 1`] = ` Object { - "actual": "", + "actual": "<doc/> +", + "error": Array [ + "[xmldom error] element parse error: Error: Hierarchy request error: Only one element can be added and only after doctype +@#[line:1,col:7]", + ], } `;