Skip to content

Commit

Permalink
feat(Compiler): case sensitive html parser
Browse files Browse the repository at this point in the history
  • Loading branch information
vicb committed Nov 14, 2015
1 parent 7e66fb8 commit 411663b
Show file tree
Hide file tree
Showing 13 changed files with 737 additions and 344 deletions.
31 changes: 21 additions & 10 deletions modules/angular2/src/compiler/html_lexer.ts
Expand Up @@ -6,7 +6,6 @@ import {
CONST_EXPR,
serializeEnum
} from 'angular2/src/facade/lang';
import {BaseException} from 'angular2/src/facade/exceptions';
import {ParseLocation, ParseError, ParseSourceFile, ParseSourceSpan} from './parse_util';
import {getHtmlTagDefinition, HtmlTagContentType, NAMED_ENTITIES} from './html_tags';

Expand Down Expand Up @@ -50,6 +49,7 @@ export function tokenizeHtml(sourceContent: string, sourceUrl: string): HtmlToke
const $EOF = 0;
const $TAB = 9;
const $LF = 10;
const $FF = 12;
const $CR = 13;

const $SPACE = 32;
Expand Down Expand Up @@ -247,17 +247,22 @@ class _HtmlTokenizer {
}
}

private _readChar(decodeEntities: boolean): string {
private _readChar(decodeEntities: boolean, extraNotCharRef: number = null): string {
if (decodeEntities && this.peek === $AMPERSAND) {
var start = this._getLocation();
this._attemptUntilChar($SEMICOLON);
this._advance();
var entitySrc = this.input.substring(start.offset + 1, this.index - 1);
var decodedEntity = decodeEntity(entitySrc);
if (isPresent(decodedEntity)) {
return decodedEntity;
if (isCharRefStart(this.peek, extraNotCharRef)) {
this._attemptUntilChar($SEMICOLON);
this._advance();
var entitySrc = this.input.substring(start.offset + 1, this.index - 1);
var decodedEntity = decodeEntity(entitySrc);
if (isPresent(decodedEntity)) {
return decodedEntity;
} else {
throw this._createError(unknownEntityErrorMsg(entitySrc), start);
}
} else {
throw this._createError(unknownEntityErrorMsg(entitySrc), start);
return '&';
}
} else {
var index = this.index;
Expand Down Expand Up @@ -389,7 +394,7 @@ class _HtmlTokenizer {
this._advance();
var parts = [];
while (this.peek !== quoteChar) {
parts.push(this._readChar(true));
parts.push(this._readChar(true, quoteChar));
}
value = parts.join('');
this._advance();
Expand Down Expand Up @@ -440,7 +445,13 @@ function isWhitespace(code: number): boolean {

function isNameEnd(code: number): boolean {
return isWhitespace(code) || code === $GT || code === $SLASH || code === $SQ || code === $DQ ||
code === $EQ
code === $EQ;
}

// http://www.w3.org/TR/html5/syntax.html#consume-a-character-reference
function isCharRefStart(code: number, extraNotCharRef: number): boolean {
return code != $TAB && code != $LF && code != $FF && code != $SPACE && code != $LT &&
code != $AMPERSAND && code != $EOF && code !== extraNotCharRef;
}

function isPrefixEnd(code: number): boolean {
Expand Down
81 changes: 31 additions & 50 deletions modules/angular2/src/compiler/html_parser.ts
Expand Up @@ -9,34 +9,22 @@ import {
serializeEnum,
CONST_EXPR
} from 'angular2/src/facade/lang';
import {DOM} from 'angular2/src/core/dom/dom_adapter';

import {ListWrapper} from 'angular2/src/facade/collection';

import {HtmlAst, HtmlAttrAst, HtmlTextAst, HtmlElementAst} from './html_ast';

import {escapeDoubleQuoteString} from './util';
import {Injectable} from 'angular2/src/core/di';
import {HtmlToken, HtmlTokenType, tokenizeHtml} from './html_lexer';
import {ParseError, ParseLocation, ParseSourceSpan} from './parse_util';
import {HtmlTagDefinition, getHtmlTagDefinition} from './html_tags';

// TODO: remove this, just provide a plain error message!
export enum HtmlTreeErrorType {
UnexpectedClosingTag
}

const HTML_ERROR_TYPE_MSGS = CONST_EXPR(['Unexpected closing tag']);


export class HtmlTreeError extends ParseError {
static create(type: HtmlTreeErrorType, elementName: string,
location: ParseLocation): HtmlTreeError {
return new HtmlTreeError(type, HTML_ERROR_TYPE_MSGS[serializeEnum(type)], elementName,
location);
static create(elementName: string, location: ParseLocation, msg: string): HtmlTreeError {
return new HtmlTreeError(elementName, location, msg);
}

constructor(public type: HtmlTreeErrorType, msg: string, public elementName: string,
location: ParseLocation) {
constructor(public elementName: string, location: ParseLocation, msg: string) {
super(location, msg);
}
}
Expand All @@ -55,11 +43,8 @@ export class HtmlParser {
}
}

var NS_PREFIX_RE = /^@[^:]+/g;

class TreeBuilder {
private index: number = -1;
private length: number;
private peek: HtmlToken;

private rootNodes: HtmlAst[] = [];
Expand Down Expand Up @@ -129,7 +114,7 @@ class TreeBuilder {
while (this.peek.type === HtmlTokenType.ATTR_NAME) {
attrs.push(this._consumeAttr(this._advance()));
}
var fullName = elementName(prefix, name, this._getParentElement());
var fullName = getElementFullName(prefix, name, this._getParentElement());
var voidElement = false;
// Note: There could have been a tokenizer error
// so that we don't get a token for the end tag...
Expand All @@ -150,15 +135,13 @@ class TreeBuilder {
}

private _pushElement(el: HtmlElementAst) {
var stackIndex = this.elementStack.length - 1;
while (stackIndex >= 0) {
for (var stackIndex = this.elementStack.length - 1; stackIndex >= 0; stackIndex--) {
var parentEl = this.elementStack[stackIndex];
if (!getHtmlTagDefinition(parentEl.name).isClosedByChild(el.name)) {
if (getHtmlTagDefinition(parentEl.name).isClosedByChild(el.name)) {
ListWrapper.splice(this.elementStack, stackIndex, this.elementStack.length - stackIndex);
break;
}
stackIndex--;
}
this.elementStack.splice(stackIndex, this.elementStack.length - 1 - stackIndex);

var tagDef = getHtmlTagDefinition(el.name);
var parentEl = this._getParentElement();
Expand All @@ -175,35 +158,29 @@ class TreeBuilder {

private _consumeEndTag(endTagToken: HtmlToken) {
var fullName =
elementName(endTagToken.parts[0], endTagToken.parts[1], this._getParentElement());
getElementFullName(endTagToken.parts[0], endTagToken.parts[1], this._getParentElement());
if (!this._popElement(fullName)) {
this.errors.push(HtmlTreeError.create(HtmlTreeErrorType.UnexpectedClosingTag, fullName,
endTagToken.sourceSpan.start));
this.errors.push(HtmlTreeError.create(fullName, endTagToken.sourceSpan.start,
`Unexpected closing tag "${endTagToken.parts[1]}"`));
}
}

private _popElement(fullName: string): boolean {
var stackIndex = this.elementStack.length - 1;
var hasError = false;
while (stackIndex >= 0) {
for (let stackIndex = this.elementStack.length - 1; stackIndex >= 0; stackIndex--) {
var el = this.elementStack[stackIndex];
if (el.name == fullName) {
break;
if (el.name.toLowerCase() == fullName.toLowerCase()) {
ListWrapper.splice(this.elementStack, stackIndex, this.elementStack.length - stackIndex);
return true;
}
if (!getHtmlTagDefinition(el.name).closedByParent) {
hasError = true;
break;
return false;
}
stackIndex--;
}
if (!hasError) {
this.elementStack.splice(stackIndex, this.elementStack.length - stackIndex);
}
return !hasError;
return false;
}

private _consumeAttr(attrName: HtmlToken): HtmlAttrAst {
var fullName = elementName(attrName.parts[0], attrName.parts[1], null);
var fullName = mergeNsAndName(attrName.parts[0], attrName.parts[1]);
var end = attrName.sourceSpan.end;
var value = '';
if (this.peek.type === HtmlTokenType.ATTR_VALUE) {
Expand All @@ -228,20 +205,24 @@ class TreeBuilder {
}
}

function elementName(prefix: string, localName: string, parentElement: HtmlElementAst) {
function mergeNsAndName(prefix: string, localName: string): string {
return isPresent(prefix) ? `@${prefix}:${localName}` : localName;
}

function getElementFullName(prefix: string, localName: string,
parentElement: HtmlElementAst): string {
if (isBlank(prefix)) {
prefix = getHtmlTagDefinition(localName).implicitNamespacePrefix;
if (isBlank(prefix) && isPresent(parentElement)) {
prefix = namespacePrefix(parentElement.name);
}
}
if (isBlank(prefix) && isPresent(parentElement)) {
prefix = namespacePrefix(parentElement.name);
}
if (isPresent(prefix)) {
return `@${prefix}:${localName}`;
} else {
return localName;
}

return mergeNsAndName(prefix, localName);
}

var NS_PREFIX_RE = /^@([^:]+)/g;

function namespacePrefix(elementName: string): string {
var match = RegExpWrapper.firstMatch(NS_PREFIX_RE, elementName);
return isBlank(match) ? null : match[1];
Expand Down
114 changes: 93 additions & 21 deletions modules/angular2/src/compiler/html_tags.ts
@@ -1,7 +1,61 @@
import {isPresent, isBlank, normalizeBool, CONST_EXPR} from 'angular2/src/facade/lang';

// TODO: fill this!
export const NAMED_ENTITIES: {[key: string]: string} = <any>CONST_EXPR({'amp': '&'});
// see http://www.w3.org/TR/html51/syntax.html#named-character-references
// see https://html.spec.whatwg.org/multipage/entities.json
// This list is not exhaustive to keep the compiler footprint low.
// The `&#123;` / `&#x1ab;` syntax should be used when the named character reference does not exist.
export const NAMED_ENTITIES = CONST_EXPR({
'lt': '<',
'gt': '>',
'nbsp': '\u00A0',
'amp': '&',
'Aacute': '\u00C1',
'Acirc': '\u00C2',
'Agrave': '\u00C0',
'Atilde': '\u00C3',
'Auml': '\u00C4',
'Ccedil': '\u00C7',
'Eacute': '\u00C9',
'Ecirc': '\u00CA',
'Egrave': '\u00C8',
'Euml': '\u00CB',
'Iacute': '\u00CD',
'Icirc': '\u00CE',
'Igrave': '\u00CC',
'Iuml': '\u00CF',
'Oacute': '\u00D3',
'Ocirc': '\u00D4',
'Ograve': '\u00D2',
'Otilde': '\u00D5',
'Ouml': '\u00D6',
'Uacute': '\u00DA',
'Ucirc': '\u00DB',
'Ugrave': '\u00D9',
'Uuml': '\u00DC',
'aacute': '\u00E1',
'acirc': '\u00E2',
'agrave': '\u00E0',
'atilde': '\u00E3',
'auml': '\u00E4',
'ccedil': '\u00E7',
'eacute': '\u00E9',
'ecirc': '\u00EA',
'egrave': '\u00E8',
'euml': '\u00EB',
'iacute': '\u00ED',
'icirc': '\u00EE',
'igrave': '\u00EC',
'iuml': '\u00EF',
'oacute': '\u00F3',
'ocirc': '\u00F4',
'ograve': '\u00F2',
'otilde': '\u00F5',
'ouml': '\u00F6',
'uacute': '\u00FA',
'ucirc': '\u00FB',
'ugrave': '\u00F9',
'uuml': '\u00FC',
});

export enum HtmlTagContentType {
RAW_TEXT,
Expand All @@ -11,54 +65,72 @@ export enum HtmlTagContentType {

export class HtmlTagDefinition {
private closedByChildren: {[key: string]: boolean} = {};
public closedByParent: boolean;
public closedByParent: boolean = false;
public requiredParent: string;
public implicitNamespacePrefix: string;
public contentType: HtmlTagContentType;

constructor({closedByChildren, requiredParent, implicitNamespacePrefix, contentType}: {
closedByChildren?: string[],
closedByChildren?: string,
requiredParent?: string,
implicitNamespacePrefix?: string,
contentType?: HtmlTagContentType
} = {}) {
if (isPresent(closedByChildren)) {
closedByChildren.forEach(tagName => this.closedByChildren[tagName] = true);
if (isPresent(closedByChildren) && closedByChildren.length > 0) {
closedByChildren.split(',').forEach(tagName => this.closedByChildren[tagName.trim()] = true);
this.closedByParent = true;
}
this.closedByParent = isPresent(closedByChildren) && closedByChildren.length > 0;
this.requiredParent = requiredParent;
this.implicitNamespacePrefix = implicitNamespacePrefix;
this.contentType = isPresent(contentType) ? contentType : HtmlTagContentType.PARSABLE_DATA;
}

requireExtraParent(currentParent: string) {
requireExtraParent(currentParent: string): boolean {
return isPresent(this.requiredParent) &&
(isBlank(currentParent) || this.requiredParent != currentParent.toLocaleLowerCase());
(isBlank(currentParent) || this.requiredParent != currentParent.toLowerCase());
}

isClosedByChild(name: string) {
isClosedByChild(name: string): boolean {
return normalizeBool(this.closedByChildren['*']) ||
normalizeBool(this.closedByChildren[name.toLowerCase()]);
}
}

// TODO: Fill this table using
// https://github.com/greim/html-tokenizer/blob/master/parser.js
// and http://www.w3.org/TR/html51/syntax.html#optional-tags
// see http://www.w3.org/TR/html51/syntax.html#optional-tags
// This implementation does not fully conform to the HTML5 spec.
var TAG_DEFINITIONS: {[key: string]: HtmlTagDefinition} = {
'link': new HtmlTagDefinition({closedByChildren: ['*']}),
'ng-content': new HtmlTagDefinition({closedByChildren: ['*']}),
'img': new HtmlTagDefinition({closedByChildren: ['*']}),
'input': new HtmlTagDefinition({closedByChildren: ['*']}),
'p': new HtmlTagDefinition({closedByChildren: ['p']}),
'tr': new HtmlTagDefinition({closedByChildren: ['tr'], requiredParent: 'tbody'}),
'col': new HtmlTagDefinition({closedByChildren: ['col'], requiredParent: 'colgroup'}),
'link': new HtmlTagDefinition({closedByChildren: '*'}),
'ng-content': new HtmlTagDefinition({closedByChildren: '*'}),
'img': new HtmlTagDefinition({closedByChildren: '*'}),
'input': new HtmlTagDefinition({closedByChildren: '*'}),
'hr': new HtmlTagDefinition({closedByChildren: '*'}),
'br': new HtmlTagDefinition({closedByChildren: '*'}),
'wbr': new HtmlTagDefinition({closedByChildren: '*'}),
'p': new HtmlTagDefinition({
closedByChildren:
'address,article,aside,blockquote,div,dl,fieldset,footer,form,h1,h2,h3,h4,h5,h6,header,hgroup,hr,main,nav,ol,p,pre,section,table,ul'
}),
'thead': new HtmlTagDefinition({closedByChildren: 'tbody,tfoot'}),
'tbody': new HtmlTagDefinition({closedByChildren: 'tbody,tfoot'}),
'tfoot': new HtmlTagDefinition({closedByChildren: 'tbody'}),
'tr': new HtmlTagDefinition({closedByChildren: 'tr', requiredParent: 'tbody'}),
'td': new HtmlTagDefinition({closedByChildren: 'td,th'}),
'th': new HtmlTagDefinition({closedByChildren: 'td,th'}),
'col': new HtmlTagDefinition({closedByChildren: 'col', requiredParent: 'colgroup'}),
'svg': new HtmlTagDefinition({implicitNamespacePrefix: 'svg'}),
'math': new HtmlTagDefinition({implicitNamespacePrefix: 'math'}),
'li': new HtmlTagDefinition({closedByChildren: 'li'}),
'dt': new HtmlTagDefinition({closedByChildren: 'dt,dd'}),
'dd': new HtmlTagDefinition({closedByChildren: 'dt,dd'}),
'rb': new HtmlTagDefinition({closedByChildren: 'rb,rt,rtc,rp'}),
'rt': new HtmlTagDefinition({closedByChildren: 'rb,rt,rtc,rp'}),
'rtc': new HtmlTagDefinition({closedByChildren: 'rb,rtc,rp'}),
'rp': new HtmlTagDefinition({closedByChildren: 'rb,rt,rtc,rp'}),
'optgroup': new HtmlTagDefinition({closedByChildren: 'optgroup'}),
'style': new HtmlTagDefinition({contentType: HtmlTagContentType.RAW_TEXT}),
'script': new HtmlTagDefinition({contentType: HtmlTagContentType.RAW_TEXT}),
'title': new HtmlTagDefinition({contentType: HtmlTagContentType.ESCAPABLE_RAW_TEXT}),
'textarea': new HtmlTagDefinition({contentType: HtmlTagContentType.ESCAPABLE_RAW_TEXT})
'textarea': new HtmlTagDefinition({contentType: HtmlTagContentType.ESCAPABLE_RAW_TEXT}),
};

var DEFAULT_TAG_DEFINITION = new HtmlTagDefinition();
Expand Down

0 comments on commit 411663b

Please sign in to comment.