Large diffs are not rendered by default.

@@ -0,0 +1,51 @@
'use strict'

const { DOMParser } = require('../../lib/dom-parser')
const { REPORTED } = require('./reported')

describe('custom errorHandler', () => {
it('function with two args receives key and message', () => {
const errors = {}
const parser = new DOMParser({
// currently needs to be a `function` to make the test work,
// `jest.fn()` or using `() => {}` doesn't work
errorHandler: function (key, msg) {
errors[key] = msg
},
})

parser.parseFromString(REPORTED.WF_AttributeMissingQuote.source, 'text/xml')
expect(errors).toHaveProperty('warning')
parser.parseFromString(
REPORTED.SYNTAX_AttributeEqualMissingValue.source,
'text/xml'
)
expect(errors).toHaveProperty('error')
parser.parseFromString(REPORTED.WF_DuplicateAttribute.source, 'text/xml')
expect(errors).toHaveProperty('fatalError')
})

it('function with one argument builds list', () => {
const errors = []
const parser = new DOMParser({
// currently needs to be a `function` to make the test work,
// `jest.fn()` or using `() => {}` doesn't work
errorHandler: function (msg) {
errors.push(msg)
},
})

parser.parseFromString(REPORTED.WF_AttributeMissingQuote.source, 'text/xml')
parser.parseFromString(
REPORTED.SYNTAX_AttributeEqualMissingValue.source,
'text/xml'
)
parser.parseFromString(REPORTED.WF_DuplicateAttribute.source, 'text/xml')

expect(errors).toMatchObject([
/\[xmldom warning]/,
/\[xmldom error]/,
/\[xmldom fatalError]/,
])
})
})

This file was deleted.

@@ -0,0 +1,106 @@
'use strict'

const { REPORTED } = require('./reported')
const { getTestParser } = require('../get-test-parser')
const { ParseError } = require('../../lib/sax')
const { DOMParser } = require('../../lib/dom-parser')

describe.each(Object.entries(REPORTED))(
'%s',
(name, { source, level, match, skippedInHtml }) => {
describe.each(['text/xml', 'text/html'])('with mimeType %s', (mimeType) => {
const isHtml = mimeType === 'text/html'
if (isHtml && skippedInHtml) {
it(`should not be reported as ${level}`, () => {
const { errors, parser } = getTestParser()

parser.parseFromString(source, mimeType)

// if no report was triggered, the key is not present on `errors`
expect(errors[level]).toBeUndefined()
})
} else {
it(`should be reported as ${level}`, () => {
const { errors, parser } = getTestParser()

parser.parseFromString(source, mimeType)

const reported = errors[level]
// store the snapshot, so any change in message can be inspected in the git diff
expect(reported).toMatchSnapshot()
// if a match has been defined, filter messages
expect(
match ? (reported || []).filter(match) : reported
).toHaveLength(1)
})
if (level === 'fatalError') {
it(`should throw ParseError in errorHandler.fatalError`, () => {
const parser = new DOMParser()

expect(() => parser.parseFromString(source, mimeType)).toThrow(
ParseError
)
})
} else if (level === 'error') {
it(`should not catch Error thrown in errorHandler.${level}`, () => {
let thrown = []
const errorHandler = {
[level]: jest.fn((message) => {
const toThrow = new Error(message)
thrown.push(toThrow)
throw toThrow
}),
}
const { parser } = getTestParser({ errorHandler })

expect(() => parser.parseFromString(source, mimeType)).toThrow(
Error
)
expect(thrown.map(toErrorSnapshot)).toMatchSnapshot()
match && expect(match(thrown[0].toString())).toBe(true)
})
} else if (level === 'warning') {
it('should escalate Error thrown in errorHandler.warning to errorHandler.error', () => {
let thrown = []
const errorHandler = {
warning: jest.fn((message) => {
const toThrow = new Error(message)
thrown.push(toThrow)
throw toThrow
}),
error: jest.fn(),
}
const { parser } = getTestParser({ errorHandler })

parser.parseFromString(source, mimeType)

expect(errorHandler.warning).toHaveBeenCalledTimes(1)
expect(errorHandler.error).toHaveBeenCalledTimes(1)
expect(thrown.map(toErrorSnapshot)).toMatchSnapshot()
match && expect(match(thrown[0].message)).toBe(true)
})
}
}
})
}
)

/**
* Creates a string from an error that is easily readable in a snapshot
* - put's the message on one line as first line
* - picks the first line in the stack trace that is in `lib/sax.js`,
* and strips absolute paths and character position from that stack entry
* as second line
* @param {Error} error
*/
function toErrorSnapshot(error) {
const libSaxMatch = /\/.*\/(lib\/sax\.js)/
return `${error.message.replace(/([\n\r]+\s*)/g, '||')}\n${error.stack
.split(/[\n\r]+/)
// find first line that is from lib/sax.js
.filter((l) => libSaxMatch.test(l))[0]
// strip of absolute path
.replace(libSaxMatch, '$1')
// strip of position of character in line
.replace(/:\d+\)$/, ')')}`
}
@@ -0,0 +1,243 @@
'use strict'

/**
* @typedef ErrorReport
* @property {string} source the XML snippet
* @property {'error'|'warning'|'fatalError'} level the name of the method triggered
* @property {?function(msg:string):boolean} match to pick the relevant report when there are multiple
* @property {?boolean} skippedInHtml Is the error reported when parsing HTML?
*/
/**
* A collection of XML samples and related information that cause the XMLReader
* to call methods on `errorHandler`.
*
* @type {Record<string, ErrorReport>}}
*/
const REPORTED = {
/**
* Entities need to be in the entityMap to be converted as part of parsing.
* xmldom currently doesn't parse entities declared in DTD.
*
* @see https://www.w3.org/TR/2008/REC-xml-20081126/#wf-entdeclared
* @see https://www.w3.org/TR/2006/REC-xml11-20060816/#wf-entdeclared
*/
WF_EntityDeclared: {
source: '<xml>&e;</xml>',
level: 'error',
match: (msg) => /entity not found/.test(msg),
},
/**
* Well-formedness constraint: Unique Att Spec
*
* An attribute name must not appear more than once
* in the same start-tag or empty-element tag.
*
* In the browser:
* - as XML it is reported as `error on line 1 at column 17: Attribute a redefined`
* - as HTML only the first definition is considered
*
* In xmldom the behavior is different for namespaces (picks first)
* than for other attributes (picks last),
* which can be a security issue.
*
* @see https://www.w3.org/TR/2008/REC-xml-20081126/#uniqattspec
* @see https://www.w3.org/TR/2006/REC-xml11-20060816/#uniqattspec
*/
WF_DuplicateAttribute: {
source: '<xml a="1" a="2"></xml>',
level: 'fatalError',
match: (msg) => /Attribute .* redefined/.test(msg),
},
/**
* This sample doesn't follow the specified grammar.
* In the browser it is reported as `error on line 1 at column 14: expected '>'`,
* but still adds the root level element to the dom.
*/
SYNTAX_EndTagNotComplete: {
source: '<xml></xml',
level: 'error',
match: (msg) => /end tag name/.test(msg) && /is not complete/.test(msg),
},
/**
* This sample doesn't follow the specified grammar.
* In the browser it is reported as `error on line 1 at column 21: expected '>'`,
* but still adds the root level element and inner tag to the dom.
*/
SYNTAX_EndTagMaybeNotComplete: {
source: '<xml><inner></inner </xml>',
level: 'error',
match: (msg) => /end tag name/.test(msg) && /maybe not complete/.test(msg),
},
/**
* This sample doesn't follow the specified grammar.
* In the browser it is reported as `error on line 1 at column 6: Comment not terminated`.
*/
SYNTAX_UnclosedComment: {
source: '<!--',
level: 'error',
match: (msg) => /Unclosed comment/.test(msg),
},
/**
* Triggered by lib/sax.js:596, caught in 208
* This sample doesn't follow the specified grammar.
* In the browser:
* - as XML it is reported as
* `error on line 1 at column 2: StartTag: invalid element name`
* - as HTML it is accepted as characters
*
*/
SYNTAX_InvalidTagName: {
source: '<123 />',
level: 'error',
match: (msg) => /invalid tagName/.test(msg),
},
/**
* Triggered by lib/sax.js:602, caught in 208
* This sample doesn't follow the specified grammar.
* In the browser:
* - as XML it is reported as
* `error on line 1 at column 6: error parsing attribute name`
* - as HTML it is accepted as attribute name
*/
SYNTAX_InvalidAttributeName: {
source: '<xml 123=""/>',
level: 'error',
match: (msg) => /invalid attribute/.test(msg),
},
/**
* This sample doesn't follow the specified grammar.
* In the browser it is reported as `error on line 1 at column 5: Couldn't find end of Start Tag xml`.
*/
SYNTAX_UnexpectedEndOfInput: {
source: '<xml',
level: 'error',
match: (msg) => /unexpected end of input/.test(msg),
},
/**
* Triggered by lib/sax.js:392, caught in 208
* This sample doesn't follow the specified grammar.
* In the browser:
* - in XML it is reported as `error on line 1 at column 8: error parsing attribute name`
* - in HTML it produces `<xml><a <="" xml=""></a></xml>` (invalid XML?)
*/
SYNTAX_ElementClosingNotConnected: {
source: '<xml><a/ </xml>',
level: 'error',
match: (msg) => /must be connected/.test(msg),
},
/**
* In the Browser (for XML) this is reported as
* `error on line 1 at column 6: Extra content at the end of the document`
* for HTML it's added to the DOM without anything being reported.
*/
WF_UnclosedXmlAttribute: {
source: '<xml>',
level: 'warning',
skippedInHtml: true,
match: (msg) => /unclosed xml attribute/.test(msg),
},
/**
* In the browser:
* - for XML it is reported as
* `error on line 1 at column 10: Specification mandates value for attribute attr`
* - for HTML is uses the attribute as one with no value and adds `"value"` to the attribute name
* and is not reporting any issue.
*/
WF_AttributeValueMustAfterEqual: {
source: '<xml attr"value" />',
level: 'warning',
match: (msg) => /attribute value must after "="/.test(msg),
},
/**
* In the browser:
* - for XML it is reported as `error on line 1 at column 11: AttValue: " or ' expected`
* - for HTML is wraps `value"` with quotes and is not reporting any issue.
*/
WF_AttributeMissingStartingQuote: {
source: '<xml attr=value" />',
level: 'warning',
match: (msg) => /missed start quot/.test(msg),
},
/**
* Triggered by lib/sax.js:264, caught in 208.
* TODO: Comment indicates fatalError, change to use errorHandler.fatalError?
*
* In the browser:
* - for XML it is reported as `error on line 1 at column 20: AttValue: ' expected`
* - for HTML nothing is added to the DOM.
*/
SYNTAX_AttributeMissingEndingQuote: {
source: '<xml attr="value />',
level: 'error',
match: (msg) => /attribute value no end .* match/.test(msg),
},
/**
* Triggered by lib/sax.js:324
* In the browser:
* - for XML it is reported as `error on line 1 at column 11: AttValue: " or ' expected`
* - for HTML is wraps `value/` with quotes and is not reporting any issue.
*/
WF_AttributeMissingQuote: {
source: '<xml attr=value/>',
level: 'warning',
match: (msg) => / missed quot/.test(msg) && /!!/.test(msg) === false,
},
/**
* Triggered by lib/sax.js:354
* This is the only warning reported in this sample.
* For some reason the "attribute" that is reported as missing quotes
* has the name `&`.
* This case is also present in 2 tests in test/html/normalize.test.js
*
* In the browser:
* - for XML it is reported as `error on line 1 at column 8: AttValue: " or ' expected`
* - for HTML is yields `<xml a="&amp;" b="&amp;"></xml>` and is not reporting any issue.
*/
WF_AttributeMissingQuote2: {
source: `<xml a=& b="&"/>`,
level: 'warning',
match: (msg) => / missed quot/.test(msg) && /!!/.test(msg),
},
/**
* In the browser:
* - for XML it is reported as `error on line 1 at column 9: AttValue: " or ' expected`
* - for HTML is yields `<doc a1></xml>` and is not reporting any issue.
*
* But the XML specifications does not allow that:
* @see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-Attribute
* @see https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-Attribute
*/
SYNTAX_AttributeEqualMissingValue: {
source: '<doc a1=></doc>',
level: 'error',
match: (msg) => /attribute value missed!!/.test(msg),
},
/**
* In the browser this is not an issue at all, but just add an attribute without a value.
* But the XML specifications does not allow that:
* @see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-Attribute
* @see https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-Attribute
*/
WF_AttributeMissingValue: {
source: '<xml attr ></xml>',
level: 'warning',
match: (msg) => /missed value/.test(msg) && /instead!!/.test(msg),
},
/**
* Triggered by lib/sax.js:376
* This seems to only be reached when there are two subsequent attributes with a missing value
* In the browser this is not an issue at all, but just add an attribute without a value.
* But the XML specifications does not allow that:
* @see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-Attribute
* @see https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-Attribute
*/
WF_AttributeMissingValue2: {
source: '<xml attr attr2 ></xml>',
level: 'warning',
match: (msg) => /missed value/.test(msg) && /instead2!!/.test(msg),
},
}

module.exports = {
REPORTED,
}
@@ -0,0 +1,140 @@
'use strict'

const { ParseError } = require('../../lib/sax')
const { __DOMHandler, DOMParser } = require('../../lib/dom-parser')

/**
* All methods implemented on the DOMHandler prototype.
*
* @type {string[]}
*/
const DOMHandlerMethods = Object.keys(__DOMHandler.prototype).sort()

/**
* XMLReader is currently not calling all methods "implemented" by DOMHandler (some are just empty),
* If this changes the first test will fail.
*
* @type {Set<string>}
*/
const UNCALLED_METHODS = new Set([
'attributeDecl',
'elementDecl',
'endEntity',
'externalEntityDecl',
'getExternalSubset',
'ignorableWhitespace',
'internalEntityDecl',
'notationDecl',
'resolveEntity',
'skippedEntity',
'startEntity',
'unparsedEntityDecl',
])

/**
* Some of the methods DOMParser/XMLReader call during parsing are not guarded by try/catch,
* hence an error happening in those will stop the parsing process.
* There is a test to verify this error handling.
* If it changes this list might need to be changed as well
*
* @type {Set<string>}
*/
const UNCAUGHT_METHODS = new Set([
'characters',
'endDocument',
'error',
'setDocumentLocator',
'startDocument',
])

function noop() {}

/**
* A subclass of DOMHandler that mocks all methods for later inspection.
* As part of the constructor it can be told which method is supposed to throw an error
* and which error constructor to use.
*
* The `methods` property provides the list of all mocks.
*/
class StubDOMHandler extends __DOMHandler {
constructor(throwingMethod, ErrorClass) {
super()
this.methods = []
DOMHandlerMethods.forEach((method) => {
const impl = jest.fn(
method === throwingMethod
? () => {
throw new (ErrorClass || ParseError)(
`StubDOMHandler throwing in ${throwingMethod}`
)
}
: noop
)
impl.mockName(method)
this[method] = impl
this.methods.push(impl)
})
}
}
/**
* This sample is triggering all method calls from XMLReader to DOMHandler at least once.
* This is verified in a test.
*
* There is of course no guarantee that it triggers all the places where XMLReader calls DOMHandler.
* For example not all possible warning and error cases are present in this file,
* but some, so that the methods are triggered.
* For testing all the cases of the different error levels,
* there are samples per case in
* @see REPORTED
*/
const ALL_METHODS = `<?xml ?>
<!DOCTYPE name >
<![CDATA[ raw ]]>
<root xmlns="namespace">
<!-- -->
<element xmlns:x="http://test" x:a="" warning>
character
</element>
<element duplicate="" duplicate="fatal"></mismatch>
</root>
<!--
`

describe('methods called in DOMHandler', () => {
it('should call "all possible" methods when using StubDOMHandler', () => {
const domBuilder = new StubDOMHandler()
const parser = new DOMParser({ domBuilder, locator: {} })
expect(domBuilder.methods).toHaveLength(DOMHandlerMethods.length)

parser.parseFromString(ALL_METHODS)

const uncalledMethodNames = domBuilder.methods
.filter((m) => m.mock.calls.length === 0)
.map((m) => m.getMockName())
expect(uncalledMethodNames).toEqual([...UNCALLED_METHODS.values()].sort())
})
describe.each(DOMHandlerMethods.filter((m) => !UNCALLED_METHODS.has(m)))(
'when DOMHandler.%s throws',
(throwing) => {
it('should not catch ParserError', () => {
const domBuilder = new StubDOMHandler(throwing, ParseError)
const parser = new DOMParser({ domBuilder, locator: {} })

expect(() => parser.parseFromString(ALL_METHODS)).toThrow(ParseError)
})
const isUncaughtMethod = UNCAUGHT_METHODS.has(throwing)
it(`${
isUncaughtMethod ? 'does not' : 'should'
} catch other Error`, () => {
const domBuilder = new StubDOMHandler(throwing, Error)
const parser = new DOMParser({ domBuilder, locator: {} })

if (isUncaughtMethod) {
expect(() => parser.parseFromString(ALL_METHODS)).toThrow()
} else {
expect(() => parser.parseFromString(ALL_METHODS)).not.toThrow()
}
})
}
)
})
@@ -0,0 +1,27 @@
'use strict'

const { getTestParser } = require('../get-test-parser')
describe('doctype', () => {
describe.each(['SYSTEM', 'PUBLIC'])('%s', (idType) => {
test.each([
['outer single', `<!DOCTYPE x ${idType} '\"'><X/>`, "'\"'"],
['outer double', `<!DOCTYPE x ${idType} "\'"><X/>`, '"\'"'],
])(
'should parse single line DOCTYPE with mixed quotes (%s)',
(_, source, idValue) => {
const { errors, parser } = getTestParser()

const actual = parser.parseFromString(source).firstChild

expect({
[idType]: idType === 'SYSTEM' ? actual.systemId : actual.publicId,
name: actual.name,
...errors,
}).toEqual({
[idType]: idValue,
name: 'x',
})
}
)
})
})
@@ -0,0 +1,38 @@
'use strict'
const { ParseError } = require('../../lib/sax')

describe('ParseError', () => {
it('should have name ParseError', () => {
expect(new ParseError('').name).toBe('ParseError')
})
it('should be an instance of Error', () => {
expect(new ParseError('') instanceof Error).toBe(true)
})

it('should be an instance of ParseError', () => {
expect(new ParseError('') instanceof ParseError).toBe(true)
})

it('should store first argument as message', () => {
const error = new ParseError('FROM TEST')
expect(error.message).toBe('FROM TEST')
})

it('should store second argument as locator', () => {
const locator = {}
const error = new ParseError('', locator)
expect(error.locator).toBe(locator)
})

it('should have correct StackTrace', () => {
const error = new ParseError('MESSAGE')
const stack = error.stack && error.stack.split(/[\n\r]+/)
expect(stack && stack.length).toBeGreaterThan(1)
expect(stack[0]).toBe('ParseError: MESSAGE')
expect(stack[1]).toContain(__filename)
})

it('Error should not be instanceof ParseError', () => {
expect(new Error() instanceof ParseError).toBe(false)
})
})