Skip to content
Permalink
Browse files
Merge pull request from GHSA-h6q6-9hqw-rwfv
* fix!: Preserve quotes in DOCTYPE declaration

Since the only purpose of parsing the DOCTYPE is to be able to restore it when serializing,
we decided that it would be best to leave the parsed publicId and systemId as is,
including any quotes.

BREAKING CHANGE: If somebody relies on the actual unquoted values of those ids,
they will need to take care of either single or double quotes and the right escaping.

(Without this change this would not have been possible because the SAX parser already
dropped the information about the quotes that have been used in the source.)

https://www.w3.org/TR/2006/REC-xml11-20060816/#dtd
https://www.w3.org/TR/2006/REC-xml11-20060816/#IDAX1KS (External Entity Declaration)

Co-authored-by: Christian Bewernitz <coder@karfau.de>
Co-authored-by: Chris Brody <chris.brody+brodybits@gmail.com>

* feat(security): Improve error reporting; throw on duplicate attribute

BREAKING CHANGE: It is currently not clear how to consistently deal with duplicate attributes,
so it is also safer for our users to fail when detecting them.

It is possible to configure the `DOMParser.errorHandler` before parsing, to handle those errors differently.

To accomplish this and also be able to verify it in tests we needed to:

- create a new `Error` type `ParseError` and export it
- Throw `ParseError` from `errorHandler.fatalError` and prevent those from being caught in `XMLReader`.
- export `DOMHandler` constructor as `__DOMHandler`

Co-authored-by: Christian Bewernitz <coder@karfau.de>
Co-authored-by: Chris Brody <chris.brody+brodybits@gmail.com>

Co-authored-by: Christian Bewernitz <coder@karfau.de>
  • Loading branch information
brodybits and karfau committed Mar 9, 2021
1 parent a4d717c commit d4201b9dfbf760049f457f9f08a3888d48835135
@@ -178,8 +178,7 @@ DOMHandler.prototype = {
console.error('[xmldom error]\t'+error,_locator(this.locator));
},
fatalError:function(error) {
console.error('[xmldom fatalError]\t'+error,_locator(this.locator));
throw error;
throw new ParseError(error, this.locator);
}
}
function _locator(l){
@@ -244,8 +243,11 @@ function appendElement (hander,node) {

//if(typeof require == 'function'){
var htmlEntity = require('./entities');
var XMLReader = require('./sax').XMLReader;
var sax = require('./sax');
var XMLReader = sax.XMLReader;
var ParseError = sax.ParseError;
var DOMImplementation = exports.DOMImplementation = require('./dom').DOMImplementation;
exports.XMLSerializer = require('./dom').XMLSerializer ;
exports.DOMParser = DOMParser;
exports.__DOMHandler = DOMHandler;
//}
@@ -1094,13 +1094,13 @@ function serializeToString(node,buf,isHTML,nodeFilter,visibleNamespaces){
var sysid = node.systemId;
buf.push('<!DOCTYPE ',node.name);
if(pubid){
buf.push(' PUBLIC "',pubid);
buf.push(' PUBLIC ', pubid);
if (sysid && sysid!='.') {
buf.push( '" "',sysid);
buf.push(' ', sysid);
}
buf.push('">');
buf.push('>');
}else if(sysid && sysid!='.'){
buf.push(' SYSTEM "',sysid,'">');
buf.push(' SYSTEM ', sysid, '>');
}else{
var sub = node.internalSubset;
if(sub){
@@ -18,6 +18,21 @@ var S_ATTR_END = 5;//attr value end and no space(quot end)
var S_TAG_SPACE = 6;//(attr value end || tag end ) && (space offer)
var S_TAG_CLOSE = 7;//closed el<el />

/**
* Creates an error that will not be caught by XMLReader aka the SAX parser.
*
* @param {string} message
* @param {any?} locator Optional, can provide details about the location in the source
* @constructor
*/
function ParseError(message, locator) {
this.message = message
this.locator = locator
if(Error.captureStackTrace) Error.captureStackTrace(this, ParseError);
}
ParseError.prototype = new Error();
ParseError.prototype.name = ParseError.name

function XMLReader(){

}
@@ -126,7 +141,7 @@ function parse(source,defaultNSMapCopy,entityMap,domBuilder,errorHandler){
}
}
if(!endMatch){
errorHandler.fatalError("end tag name: "+tagName+' is not match the current start tagName:'+config.tagName );
errorHandler.fatalError("end tag name: "+tagName+' is not match the current start tagName:'+config.tagName ); // No known test case
}
}else{
parseStack.push(config)
@@ -187,10 +202,11 @@ function parse(source,defaultNSMapCopy,entityMap,domBuilder,errorHandler){
}
}
}catch(e){
if (e instanceof ParseError) {
throw e;
}
errorHandler.error('element parse error: '+e)
//errorHandler.error('element parse error: '+e);
end = -1;
//throw e;
}
if(end>start){
start = end;
@@ -211,6 +227,16 @@ function copyLocator(f,t){
* @return end of the elementStartPart(end of elementEndPart for selfClosed el)
*/
function parseElementStartPart(source,start,el,currentNSMap,entityReplacer,errorHandler){

/**
* @param {string} qname
* @param {string} value
* @param {number} startIndex
*/
function addAttribute(qname, value, startIndex) {
if (qname in el.attributeNames) errorHandler.fatalError('Attribute ' + qname + ' redefined')
el.addValue(qname, value, startIndex)
}
var attrName;
var value;
var p = ++start;
@@ -226,7 +252,7 @@ function parseElementStartPart(source,start,el,currentNSMap,entityReplacer,error
s = S_EQ;
}else{
//fatalError: equal must after attrName or space after attrName
throw new Error('attribute equal must after attrName');
throw new Error('attribute equal must after attrName'); // No known test case
}
break;
case '\'':
@@ -241,7 +267,7 @@ function parseElementStartPart(source,start,el,currentNSMap,entityReplacer,error
p = source.indexOf(c,start)
if(p>0){
value = source.slice(start,p).replace(/&#?\w+;/g,entityReplacer);
el.add(attrName,value,start-1);
addAttribute(attrName, value, start-1);
s = S_ATTR_END;
}else{
//fatalError: no end quot match
@@ -250,14 +276,14 @@ function parseElementStartPart(source,start,el,currentNSMap,entityReplacer,error
}else if(s == S_ATTR_NOQUOT_VALUE){
value = source.slice(start,p).replace(/&#?\w+;/g,entityReplacer);
//console.log(attrName,value,start,p)
el.add(attrName,value,start);
addAttribute(attrName, value, start);
//console.dir(el)
errorHandler.warning('attribute "'+attrName+'" missed start quot('+c+')!!');
start = p+1;
s = S_ATTR_END
}else{
//fatalError: no equal before
throw new Error('attribute value must after "="');
throw new Error('attribute value must after "="'); // No known test case
}
break;
case '/':
@@ -275,11 +301,10 @@ function parseElementStartPart(source,start,el,currentNSMap,entityReplacer,error
break;
//case S_EQ:
default:
throw new Error("attribute invalid close char('/')")
throw new Error("attribute invalid close char('/')") // No known test case
}
break;
case ''://end document
//throw new Error('unexpected end of input')
errorHandler.error('unexpected end of input');
if(s == S_TAG){
el.setTagName(source.slice(start,p));
@@ -305,13 +330,13 @@ function parseElementStartPart(source,start,el,currentNSMap,entityReplacer,error
value = attrName;
}
if(s == S_ATTR_NOQUOT_VALUE){
errorHandler.warning('attribute "'+value+'" missed quot(")!!');
el.add(attrName,value.replace(/&#?\w+;/g,entityReplacer),start)
errorHandler.warning('attribute "'+value+'" missed quot(")!');
addAttribute(attrName, value.replace(/&#?\w+;/g,entityReplacer), start)
}else{
if(currentNSMap[''] !== 'http://www.w3.org/1999/xhtml' || !value.match(/^(?:disabled|checked|selected)$/i)){
errorHandler.warning('attribute "'+value+'" missed value!! "'+value+'" instead!!')
}
el.add(value,value,start)
addAttribute(value, value, start)
}
break;
case S_EQ:
@@ -336,7 +361,7 @@ function parseElementStartPart(source,start,el,currentNSMap,entityReplacer,error
case S_ATTR_NOQUOT_VALUE:
var value = source.slice(start,p).replace(/&#?\w+;/g,entityReplacer);
errorHandler.warning('attribute "'+value+'" missed quot(")!!');
el.add(attrName,value,start)
addAttribute(attrName, value, start)
case S_ATTR_END:
s = S_TAG_SPACE;
break;
@@ -359,7 +384,7 @@ function parseElementStartPart(source,start,el,currentNSMap,entityReplacer,error
if(currentNSMap[''] !== 'http://www.w3.org/1999/xhtml' || !attrName.match(/^(?:disabled|checked|selected)$/i)){
errorHandler.warning('attribute "'+attrName+'" missed value!! "'+attrName+'" instead2!!')
}
el.add(attrName,attrName,start);
addAttribute(attrName, attrName, start);
start = p;
s = S_ATTR;
break;
@@ -542,8 +567,7 @@ function parseDCC(source,start,domBuilder,errorHandler){//sure start with '<!'
}
}
var lastMatch = matchs[len-1]
domBuilder.startDTD(name,pubid && pubid.replace(/^(['"])(.*?)\1$/,'$2'),
sysid && sysid.replace(/^(['"])(.*?)\1$/,'$2'));
domBuilder.startDTD(name, pubid, sysid);
domBuilder.endDTD();

return lastMatch.index+lastMatch[0].length
@@ -569,11 +593,8 @@ function parseInstruction(source,start,domBuilder){
return -1;
}

/**
* @param source
*/
function ElementAttributes(source){

function ElementAttributes(){
this.attributeNames = {}
}
ElementAttributes.prototype = {
setTagName:function(tagName){
@@ -582,10 +603,11 @@ ElementAttributes.prototype = {
}
this.tagName = tagName
},
add:function(qName,value,offset){
addValue:function(qName, value, offset) {
if(!tagNamePattern.test(qName)){
throw new Error('invalid attribute:'+qName)
}
this.attributeNames[qName] = this.length;
this[this.length++] = {qName:qName,value:value,offset:offset}
},
length:0,
@@ -621,4 +643,4 @@ function split(source,start){
}

exports.XMLReader = XMLReader;

exports.ParseError = ParseError;

0 comments on commit d4201b9

Please sign in to comment.