Permalink
Browse files

tests 1 through 4 pass except UCS2 issues

  • Loading branch information...
1 parent 48598a4 commit a2641580b5f5d25d530da761d8153ccbc2fdb3d7 @wycats committed Apr 14, 2012
View
@@ -1,5 +1,7 @@
data =
- "&" -> charRefInData
+ "&" -> charRefInData {
+ lexer.additionalChar = null;
+ }
"<" -> tagOpen
NULL (error) {
lexer.setToken(TkChar, char);
@@ -53,7 +55,7 @@ endTagOpen =
}
tagName =
- SPACE -> beforeAttributeName
+ ANYSPACE -> beforeAttributeName
"/" -> selfClosingStartTag
">" -> data {
lexer.emitCurrentToken();
@@ -70,7 +72,7 @@ tagName =
}
beforeAttributeName =
- SPACE
+ ANYSPACE
"/" -> selfClosingStartTag
">" -> data {
lexer.emitCurrentToken();
@@ -90,7 +92,7 @@ beforeAttributeName =
}
attributeName =
- SPACE -> afterAttributeName
+ ANYSPACE -> afterAttributeName
"/" -> selfClosingStartTag
"=" -> beforeAttributeValue
">" -> data {
@@ -111,7 +113,7 @@ attributeName =
}
afterAttributeName =
- SPACE
+ ANYSPACE
"/" -> selfClosingStartTag
"=" -> beforeAttributeValue
">" -> data {
@@ -132,7 +134,7 @@ afterAttributeName =
}
beforeAttributeValue =
- SPACE
+ ANYSPACE
QUOTE -> attributeValueDoubleQuoted
"&" -> attributeValueUnquoted {
lexer.reconsume();
@@ -154,7 +156,9 @@ beforeAttributeValue =
attributeValueDoubleQuoted =
QUOTE -> afterAttributeValueQuoted
- "&" -> charRefInAttributeValue
+ "&" -> charRefInAttributeValue {
+ lexer.additionalChar = "\"";
+ }
NULL (error) {
token.pushAttributeValue(REPLACEMENT);
}
@@ -165,7 +169,9 @@ attributeValueDoubleQuoted =
attributeValueSingleQuoted =
"'" -> afterAttributeValueQuoted
- "&" -> charRefInAttributeValue
+ "&" -> charRefInAttributeValue {
+ lexer.additionalChar = "'";
+ }
NULL (error) {
token.pushAttributeValue(REPLACEMENT);
}
@@ -175,8 +181,10 @@ attributeValueSingleQuoted =
}
attributeValueUnquoted =
- SPACE -> beforeAttributeName
- "&" -> charRefInAttributeValue
+ ANYSPACE -> beforeAttributeName
+ "&" -> charRefInAttributeValue {
+ lexer.additionalChar = ">";
+ }
">" -> data {
lexer.emitCurrentToken();
}
@@ -192,21 +200,21 @@ attributeValueUnquoted =
}
charRefInAttributeValue =
- default -> data {{
+ default -> last {{
// This doesn't currently work
// TODO: specify the "allowed extra char"
// TODO: implement a history symbol
var token = lexer.consumeCharacterReference();
if (token) {
- lexer.pushToken(token);
+ lexer.token.pushAttributeValue(token.data);
} else {
- lexer.pushToken(TkChar, "&");
+ lexer.token.pushAttributeValue("&");
}
}}
afterAttributeValueQuoted =
- SPACE -> beforeAttributeName
+ ANYSPACE -> beforeAttributeName
"/" -> selfClosingStartTag
">" -> data {
lexer.emitCurrentToken();
@@ -275,7 +283,7 @@ commentStart =
commentStartDash =
"-" -> commentEnd
NULL (error) -> comment {
- token.addChars("-" + REPLACEMENT);
+ token.addChars("-", REPLACEMENT);
}
">" (error) -> data {
lexer.emitCurrentToken();
@@ -284,7 +292,7 @@ commentStartDash =
lexer.emitCurrentToken();
}
default -> comment {
- token.addChars("-" + char);
+ token.addChars("-", char);
}
comment =
@@ -302,21 +310,21 @@ comment =
commentEndDash =
"-" -> commentEnd
NULL (error) -> comment {
- token.addChars("-" + REPLACEMENT);
+ token.addChars("-", REPLACEMENT);
}
EOF (error) -> data {
lexer.emitCurrentToken();
}
default -> comment {
- token.addChars("-" + char);
+ token.addChars("-", char);
}
commentEnd =
">" -> data {
lexer.emitCurrentToken();
}
NULL (error) -> comment {
- token.addChars("-" + REPLACEMENT);
+ token.addChars("-", "-", REPLACEMENT);
}
"!" (error) -> commentEndBang
"-" (error) {
@@ -326,28 +334,28 @@ commentEnd =
lexer.emitCurrentToken();
}
default (error) -> comment {
- token.addChars("--" + char);
+ token.addChars("-", "-", char);
}
commentEndBang =
"-" -> commentEndDash {
- token.addChars("--!");
+ token.addChars("-", "-", "!");
}
">" -> data {
lexer.emitCurrentToken();
}
NULL (error) -> comment {
- token.addChars("--!");
+ token.addChars("-", "-", "!");
}
EOF (error) -> data {
lexer.emitCurrentToken();
}
default -> comment {
- token.addChars("--!" + char);
+ token.addChars("-", "-", "!" + char);
}
DOCTYPE =
- SPACE -> beforeDOCTYPEName
+ ANYSPACE -> beforeDOCTYPEName
EOF (error) -> data {
lexer.setToken(TkDOCTYPE);
lexer.token.forceQuirks = true;
@@ -358,7 +366,7 @@ DOCTYPE =
}
beforeDOCTYPEName =
- SPACE
+ ANYSPACE
/[A-Z]/ -> DOCTYPEName {
lexer.setToken(TkDOCTYPE);
lexer.token.name = char.toLowerCase();
@@ -383,7 +391,7 @@ beforeDOCTYPEName =
}
DOCTYPEName =
- SPACE -> afterDOCTYPEName
+ ANYSPACE -> afterDOCTYPEName
">" -> data {
lexer.emitCurrentToken();
}
@@ -402,7 +410,7 @@ DOCTYPEName =
}
afterDOCTYPEName =
- SPACE
+ ANYSPACE
">" -> data {
lexer.emitCurrentToken();
}
@@ -426,7 +434,7 @@ afterDOCTYPEName =
}
afterDOCTYPEPublicKeyword =
- SPACE -> beforeDOCTYPEPublicIdentifier
+ ANYSPACE -> beforeDOCTYPEPublicIdentifier
QUOTE (error) -> DOCTYPEPublicIdentifierDoubleQuoted {
token.publicIdentifier = "";
}
@@ -446,7 +454,7 @@ afterDOCTYPEPublicKeyword =
}
beforeDOCTYPEPublicIdentifier =
- SPACE
+ ANYSPACE
QUOTE -> DOCTYPEPublicIdentifierDoubleQuoted {
token.publicIdentifier = "";
}
@@ -500,7 +508,7 @@ DOCTYPEPublicIdentifierSingleQuoted =
}
afterDOCTYPEPublicIdentifier =
- SPACE -> betweenDOCTYPEPublicAndSystemIdentifier
+ ANYSPACE -> betweenDOCTYPEPublicAndSystemIdentifier
">" -> data {
lexer.emitCurrentToken();
}
@@ -514,12 +522,12 @@ afterDOCTYPEPublicIdentifier =
token.forceQuirks = true;
lexer.emitCurrentToken();
}
- default -> bogusDOCTYPE {
+ default (error) -> bogusDOCTYPE {
token.forceQuirks = true;
}
betweenDOCTYPEPublicAndSystemIdentifier =
- SPACE
+ ANYSPACE
">" -> data {
lexer.emitCurrentToken();
}
@@ -538,7 +546,7 @@ betweenDOCTYPEPublicAndSystemIdentifier =
}
afterDOCTYPESystemKeyword =
- SPACE -> beforeDOCTYPESystemIdentifier
+ ANYSPACE -> beforeDOCTYPESystemIdentifier
QUOTE (error) -> DOCTYPESystemIdentifierDoubleQuoted {
token.systemIdentifier = "";
}
@@ -558,7 +566,7 @@ afterDOCTYPESystemKeyword =
}
beforeDOCTYPESystemIdentifier =
- SPACE
+ ANYSPACE
QUOTE -> DOCTYPESystemIdentifierDoubleQuoted {
token.systemIdentifier = "";
}
@@ -612,7 +620,7 @@ DOCTYPESystemIdentifierSingleQuoted =
}
afterDOCTYPESystemIdentifier =
- SPACE
+ ANYSPACE
">" -> data {
lexer.emitCurrentToken();
}
View
@@ -84,11 +84,17 @@ def normal_section(section)
def raw_section(section)
rule = section.rules.first
+ if rule.transition == "last"
+ transition = "lexer.lastState.toString()"
+ elsif rule.transition
+ transition = "'#{rule.transition}'"
+ end
+
out = "states.#{section.name} = {\n"
out += " toString: function() { return '#{section.name}'; },\n\n"
out += " consume: function(lexer) {\n"
- out += " lexer.setState('#{rule.transition}');\n" if rule.transition
+ out += " lexer.setState(#{transition});\n" if transition
out += action(rule.action_content, 4)
out += " }\n"
out += "};\n"
@@ -113,7 +119,7 @@ def visit_normal_Rule(rule)
if desc == "default"
spaces = " "
- elsif rule.type == :regex
+ elsif rule.type == :regex || rule.value == "ANYSPACE"
out += " if (#{desc}.test(char)) {\n"
spaces = " "
else
@@ -123,6 +129,8 @@ def visit_normal_Rule(rule)
if rule.transition && rule.error
out += "#{spaces}lexer.errorState('#{rule.transition}');\n"
+ elsif rule.transition && rule.transition == "last"
+ out += "#{spaces}lexer.setState(lexer.lastState.toString());\n"
elsif rule.transition
out += "#{spaces}lexer.setState('#{rule.transition}');\n"
elsif rule.error
@@ -185,6 +193,7 @@ def strip_leading_whitespace(content)
def case_rules?(section)
section.rules.all? do |rule|
+ break false if rule.value == "ANYSPACE"
rule.type == :string || rule.type == :symbol
end
end
@@ -312,7 +321,7 @@ def parse_rule_match
end
unless token
- @scanner.scan /NULL|EOF|SPACE|QUOTE|default/
+ @scanner.scan /NULL|EOF|SPACE|ANYSPACE|QUOTE|default/
if @scanner.matched?
token = RuleNode.new(:symbol, @scanner.matched)
end
Oops, something went wrong.

0 comments on commit a264158

Please sign in to comment.