Skip to content

Commit

Permalink
fix(compiler): support full range of entity decoding in browser builds
Browse files Browse the repository at this point in the history
BREAKING CHANGE: compiler options have been adjusted.
    - new option `decodeEntities` is added.
    - `namedCharacterReferences` option has been removed.
    - `maxCRNameLength` option has been rmeoved.
  • Loading branch information
yyx990803 committed Apr 8, 2020
1 parent 8c17535 commit 1f6e72b
Show file tree
Hide file tree
Showing 11 changed files with 1,423 additions and 2,987 deletions.
3,684 changes: 1,178 additions & 2,506 deletions packages/compiler-core/__tests__/__snapshots__/parse.spec.ts.snap

Large diffs are not rendered by default.

272 changes: 2 additions & 270 deletions packages/compiler-core/__tests__/parse.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ import {
NodeTypes,
Position,
TextNode,
AttributeNode,
InterpolationNode
} from '../src/ast'

Expand Down Expand Up @@ -163,114 +162,6 @@ describe('compiler: parse', () => {
}
})
})

test('HTML entities compatibility in text (https://html.spec.whatwg.org/multipage/parsing.html#named-character-reference-state).', () => {
const spy = jest.fn()
const ast = baseParse('&ampersand;', {
namedCharacterReferences: { amp: '&' },
onError: spy
})
const text = ast.children[0] as TextNode

expect(text).toStrictEqual({
type: NodeTypes.TEXT,
content: '&ersand;',
loc: {
start: { offset: 0, line: 1, column: 1 },
end: { offset: 11, line: 1, column: 12 },
source: '&ampersand;'
}
})
expect(spy.mock.calls).toMatchObject([
[
{
code: ErrorCodes.MISSING_SEMICOLON_AFTER_CHARACTER_REFERENCE,
loc: {
start: { offset: 4, line: 1, column: 5 }
}
}
]
])
})

test('HTML entities compatibility in attribute (https://html.spec.whatwg.org/multipage/parsing.html#named-character-reference-state).', () => {
const spy = jest.fn()
const ast = baseParse(
'<div a="&ampersand;" b="&amp;ersand;" c="&amp!"></div>',
{
namedCharacterReferences: { amp: '&', 'amp;': '&' },
onError: spy
}
)
const element = ast.children[0] as ElementNode
const text1 = (element.props[0] as AttributeNode).value
const text2 = (element.props[1] as AttributeNode).value
const text3 = (element.props[2] as AttributeNode).value

expect(text1).toStrictEqual({
type: NodeTypes.TEXT,
content: '&ampersand;',
loc: {
start: { offset: 7, line: 1, column: 8 },
end: { offset: 20, line: 1, column: 21 },
source: '"&ampersand;"'
}
})
expect(text2).toStrictEqual({
type: NodeTypes.TEXT,
content: '&ersand;',
loc: {
start: { offset: 23, line: 1, column: 24 },
end: { offset: 37, line: 1, column: 38 },
source: '"&amp;ersand;"'
}
})
expect(text3).toStrictEqual({
type: NodeTypes.TEXT,
content: '&!',
loc: {
start: { offset: 40, line: 1, column: 41 },
end: { offset: 47, line: 1, column: 48 },
source: '"&amp!"'
}
})
expect(spy.mock.calls).toMatchObject([
[
{
code: ErrorCodes.MISSING_SEMICOLON_AFTER_CHARACTER_REFERENCE,
loc: {
start: { offset: 45, line: 1, column: 46 }
}
}
]
])
})

test('Some control character reference should be replaced.', () => {
const spy = jest.fn()
const ast = baseParse('&#x86;', { onError: spy })
const text = ast.children[0] as TextNode

expect(text).toStrictEqual({
type: NodeTypes.TEXT,
content: '†',
loc: {
start: { offset: 0, line: 1, column: 1 },
end: { offset: 6, line: 1, column: 7 },
source: '&#x86;'
}
})
expect(spy.mock.calls).toMatchObject([
[
{
code: ErrorCodes.CONTROL_CHARACTER_REFERENCE,
loc: {
start: { offset: 0, line: 1, column: 1 }
}
}
]
])
})
})

describe('Interpolation', () => {
Expand Down Expand Up @@ -1652,12 +1543,10 @@ foo
expect(baz.loc.end).toEqual({ line: 2, column: 28, offset })
})

describe('namedCharacterReferences option', () => {
describe('decodeEntities option', () => {
test('use the given map', () => {
const ast: any = baseParse('&amp;&cups;', {
namedCharacterReferences: {
'cups;': '\u222A\uFE00' // UNION with serifs
},
decodeEntities: text => text.replace('&cups;', '\u222A\uFE00'),
onError: () => {} // Ignore errors
})

Expand Down Expand Up @@ -1756,60 +1645,6 @@ foo
errors: []
}
],
ABSENCE_OF_DIGITS_IN_NUMERIC_CHARACTER_REFERENCE: [
{
code: '<template>&#a;</template>',
errors: [
{
type: ErrorCodes.ABSENCE_OF_DIGITS_IN_NUMERIC_CHARACTER_REFERENCE,
loc: { offset: 10, line: 1, column: 11 }
}
]
},
{
code: '<template>&#xg;</template>',
errors: [
{
type: ErrorCodes.ABSENCE_OF_DIGITS_IN_NUMERIC_CHARACTER_REFERENCE,
loc: { offset: 10, line: 1, column: 11 }
}
]
},
{
code: '<template>&#99;</template>',
errors: []
},
{
code: '<template>&#xff;</template>',
errors: []
},
{
code: '<template attr="&#a;"></template>',
errors: [
{
type: ErrorCodes.ABSENCE_OF_DIGITS_IN_NUMERIC_CHARACTER_REFERENCE,
loc: { offset: 16, line: 1, column: 17 }
}
]
},
{
code: '<template attr="&#xg;"></template>',
errors: [
{
type: ErrorCodes.ABSENCE_OF_DIGITS_IN_NUMERIC_CHARACTER_REFERENCE,
loc: { offset: 16, line: 1, column: 17 }
}
]
},
{
code: '<template attr="&#99;"></template>',
errors: []
},
{
code: '<template attr="&#xff;"></template>',
errors: []
}
],
CDATA_IN_HTML_CONTENT: [
{
code: '<template><![CDATA[cdata]]></template>',
Expand All @@ -1825,37 +1660,6 @@ foo
errors: []
}
],
CHARACTER_REFERENCE_OUTSIDE_UNICODE_RANGE: [
{
code: '<template>&#1234567;</template>',
errors: [
{
type: ErrorCodes.CHARACTER_REFERENCE_OUTSIDE_UNICODE_RANGE,
loc: { offset: 10, line: 1, column: 11 }
}
]
}
],
CONTROL_CHARACTER_REFERENCE: [
{
code: '<template>&#0003;</template>',
errors: [
{
type: ErrorCodes.CONTROL_CHARACTER_REFERENCE,
loc: { offset: 10, line: 1, column: 11 }
}
]
},
{
code: '<template>&#x7F;</template>',
errors: [
{
type: ErrorCodes.CONTROL_CHARACTER_REFERENCE,
loc: { offset: 10, line: 1, column: 11 }
}
]
}
],
DUPLICATE_ATTRIBUTE: [
{
code: '<template><div id="" id=""></div></template>',
Expand Down Expand Up @@ -2412,36 +2216,6 @@ foo
]
}
],
MISSING_SEMICOLON_AFTER_CHARACTER_REFERENCE: [
{
code: '<template>&amp</template>',
options: { namedCharacterReferences: { amp: '&' } },
errors: [
{
type: ErrorCodes.MISSING_SEMICOLON_AFTER_CHARACTER_REFERENCE,
loc: { offset: 14, line: 1, column: 15 }
}
]
},
{
code: '<template>&#40</template>',
errors: [
{
type: ErrorCodes.MISSING_SEMICOLON_AFTER_CHARACTER_REFERENCE,
loc: { offset: 14, line: 1, column: 15 }
}
]
},
{
code: '<template>&#x40</template>',
errors: [
{
type: ErrorCodes.MISSING_SEMICOLON_AFTER_CHARACTER_REFERENCE,
loc: { offset: 15, line: 1, column: 16 }
}
]
}
],
MISSING_WHITESPACE_BETWEEN_ATTRIBUTES: [
{
code: '<template><div id="foo"class="bar"></div></template>',
Expand Down Expand Up @@ -2500,48 +2274,6 @@ foo
]
}
],
NONCHARACTER_CHARACTER_REFERENCE: [
{
code: '<template>&#xFFFE;</template>',
errors: [
{
type: ErrorCodes.NONCHARACTER_CHARACTER_REFERENCE,
loc: { offset: 10, line: 1, column: 11 }
}
]
},
{
code: '<template>&#x1FFFF;</template>',
errors: [
{
type: ErrorCodes.NONCHARACTER_CHARACTER_REFERENCE,
loc: { offset: 10, line: 1, column: 11 }
}
]
}
],
NULL_CHARACTER_REFERENCE: [
{
code: '<template>&#0000;</template>',
errors: [
{
type: ErrorCodes.NULL_CHARACTER_REFERENCE,
loc: { offset: 10, line: 1, column: 11 }
}
]
}
],
SURROGATE_CHARACTER_REFERENCE: [
{
code: '<template>&#xD800;</template>',
errors: [
{
type: ErrorCodes.SURROGATE_CHARACTER_REFERENCE,
loc: { offset: 10, line: 1, column: 11 }
}
]
}
],
UNEXPECTED_CHARACTER_IN_ATTRIBUTE_NAME: [
{
code: "<template><div a\"bc=''></div></template>",
Expand Down
21 changes: 0 additions & 21 deletions packages/compiler-core/src/errors.ts
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,7 @@ export function createCompilerError<T extends number>(
export const enum ErrorCodes {
// parse errors
ABRUPT_CLOSING_OF_EMPTY_COMMENT,
ABSENCE_OF_DIGITS_IN_NUMERIC_CHARACTER_REFERENCE,
CDATA_IN_HTML_CONTENT,
CHARACTER_REFERENCE_OUTSIDE_UNICODE_RANGE,
CONTROL_CHARACTER_REFERENCE,
DUPLICATE_ATTRIBUTE,
END_TAG_WITH_ATTRIBUTES,
END_TAG_WITH_TRAILING_SOLIDUS,
Expand All @@ -49,12 +46,8 @@ export const enum ErrorCodes {
INVALID_FIRST_CHARACTER_OF_TAG_NAME,
MISSING_ATTRIBUTE_VALUE,
MISSING_END_TAG_NAME,
MISSING_SEMICOLON_AFTER_CHARACTER_REFERENCE,
MISSING_WHITESPACE_BETWEEN_ATTRIBUTES,
NESTED_COMMENT,
NONCHARACTER_CHARACTER_REFERENCE,
NULL_CHARACTER_REFERENCE,
SURROGATE_CHARACTER_REFERENCE,
UNEXPECTED_CHARACTER_IN_ATTRIBUTE_NAME,
UNEXPECTED_CHARACTER_IN_UNQUOTED_ATTRIBUTE_VALUE,
UNEXPECTED_EQUALS_SIGN_BEFORE_ATTRIBUTE_NAME,
Expand Down Expand Up @@ -101,14 +94,8 @@ export const enum ErrorCodes {
export const errorMessages: { [code: number]: string } = {
// parse errors
[ErrorCodes.ABRUPT_CLOSING_OF_EMPTY_COMMENT]: 'Illegal comment.',
[ErrorCodes.ABSENCE_OF_DIGITS_IN_NUMERIC_CHARACTER_REFERENCE]:
'Illegal numeric character reference: invalid character.',
[ErrorCodes.CDATA_IN_HTML_CONTENT]:
'CDATA section is allowed only in XML context.',
[ErrorCodes.CHARACTER_REFERENCE_OUTSIDE_UNICODE_RANGE]:
'Illegal numeric character reference: too big.',
[ErrorCodes.CONTROL_CHARACTER_REFERENCE]:
'Illegal numeric character reference: control character.',
[ErrorCodes.DUPLICATE_ATTRIBUTE]: 'Duplicate attribute.',
[ErrorCodes.END_TAG_WITH_ATTRIBUTES]: 'End tag cannot have attributes.',
[ErrorCodes.END_TAG_WITH_TRAILING_SOLIDUS]: "Illegal '/' in tags.",
Expand All @@ -124,17 +111,9 @@ export const errorMessages: { [code: number]: string } = {
"Illegal tag name. Use '&lt;' to print '<'.",
[ErrorCodes.MISSING_ATTRIBUTE_VALUE]: 'Attribute value was expected.',
[ErrorCodes.MISSING_END_TAG_NAME]: 'End tag name was expected.',
[ErrorCodes.MISSING_SEMICOLON_AFTER_CHARACTER_REFERENCE]:
'Semicolon was expected.',
[ErrorCodes.MISSING_WHITESPACE_BETWEEN_ATTRIBUTES]:
'Whitespace was expected.',
[ErrorCodes.NESTED_COMMENT]: "Unexpected '<!--' in comment.",
[ErrorCodes.NONCHARACTER_CHARACTER_REFERENCE]:
'Illegal numeric character reference: non character.',
[ErrorCodes.NULL_CHARACTER_REFERENCE]:
'Illegal numeric character reference: null character.',
[ErrorCodes.SURROGATE_CHARACTER_REFERENCE]:
'Illegal numeric character reference: non-pair surrogate.',
[ErrorCodes.UNEXPECTED_CHARACTER_IN_ATTRIBUTE_NAME]:
'Attribute name cannot contain U+0022 ("), U+0027 (\'), and U+003C (<).',
[ErrorCodes.UNEXPECTED_CHARACTER_IN_UNQUOTED_ATTRIBUTE_VALUE]:
Expand Down
8 changes: 1 addition & 7 deletions packages/compiler-core/src/options.ts
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,7 @@ export interface ParserOptions {
parent: ElementNode | undefined
) => TextModes
delimiters?: [string, string] // ['{{', '}}']

// Map to HTML entities. E.g., `{ "amp;": "&" }`
// The full set is https://html.spec.whatwg.org/multipage/named-characters.html#named-character-references
namedCharacterReferences?: Record<string, string>
// this number is based on the map above, but it should be pre-computed
// to avoid the cost on every parse() call.
maxCRNameLength?: number
decodeEntities?: (rawText: string, asAttr: boolean) => string
onError?: (error: CompilerError) => void
}

Expand Down
Loading

0 comments on commit 1f6e72b

Please sign in to comment.