Skip to content

Commit

Permalink
[XHR] Introduce a mime type parser to a WPT
Browse files Browse the repository at this point in the history
This CL introduces a mime type parser and stringifier to
wpt/XMLHttpRequest/send-content-type-charset in order to accept
implementations that are actually conforming to the spec but were rejected
by the test due to some text representation errors.

Bug: whatwg/mimesniff#39
Change-Id: I99466e2e596bb9c1b7f11267ad4ff0a886913086
  • Loading branch information
yutakahirano authored and chromium-wpt-export-bot committed Nov 21, 2017
1 parent 82544df commit fe6dd72
Showing 1 changed file with 240 additions and 66 deletions.
306 changes: 240 additions & 66 deletions XMLHttpRequest/send-content-type-charset.htm
Original file line number Diff line number Diff line change
Expand Up @@ -10,74 +10,248 @@
<body>
<div id="log"></div>
<script>
function request(input, output, title) {
title = title || document.title + ' - ' + input;
test(function() {
var client = new XMLHttpRequest()
client.open("POST", "resources/content.py", false)
if(input)
client.setRequestHeader("Content-Type", input)
client.send("TEST")
assert_equals(client.responseText, "TEST")
assert_equals(client.getResponseHeader("x-request-content-type"), output)
}, title)
function isTokenChar(c) {
const code = c.charCodeAt(0);
if (code >= 127)
return false;
if (code < 0x20)
return false;
switch (c) {
case ' ':
case ';':
case '"':
case '(':
case ')':
case '<':
case '>':
case '@':
case ',':
case ':':
case '\\':
case '/':
case '[':
case ']':
case '?':
case '=':
return false;
default:
return true;
}
}
class Tokenizer {
constructor(string) {
this.string = string;
this.index = 0;
this.skipSpaces();
}

consumeChar(c) {
const s = this.string;
if (this.isDone() || s[this.index] !== c) {
return false;
}
++this.index;
this.skipSpaces();
return true;
}
consumeToken() {
const start = this.index;
const s = this.string;
while (!this.isDone() && isTokenChar(s[this.index])) {
++this.index;
}
if (start === this.index) {
return {success: false, token: undefined};
}
const token = s.substring(start, this.index);
this.skipSpaces();
return {success: true, token};
}
consumeQuotedString() {
const s = this.string;
const start = this.index;
let contents = '';
if (this.isDone() || s[this.index] != '"') {
return false;
}
++this.index;
while (!this.isDone()) {
if (s[this.index] === '"') {
++this.index;
this.skipSpaces();
return {success: true, token: contents};
}
if (s[this.index] === '\\') {
++this.index;
if (this.isDone()) {
this.index = start;
return {success: false, token: undefined};
}
}
contents += s[this.index];
++this.index;
}
this.index = start;
return {success: false, token: undefined};
}
consumeTokenOrQuotedString() {
if (this.isDone()) {
return false;
}
if (this.string[this.index] === '"') {
return this.consumeQuotedString();
}
return this.consumeToken();
}

skipSpaces() {
const s = this.string;
while (!this.isDone() &&
(s[this.index] === ' ' || s[this.index] === '\t')) {
this.index++;
}
}

isDone() { return this.index >= this.string.length; }
}
function parse(string) {
const tokenizer = new Tokenizer(string);
const parameters = [];
let success, token;

({success, token} = tokenizer.consumeToken());
if (!success) {
return undefined;
}

const type = token;
if (!tokenizer.consumeChar('/')) {
return undefined;
}

({success, token} = tokenizer.consumeToken());
if (!success) {
return undefined;
}
const subtype = token;

while (!tokenizer.isDone()) {
if (!tokenizer.consumeChar(';')) {
return undefined;
}
({success, token} = tokenizer.consumeToken());
if (!success) {
return undefined;
}
const name = token;
let value = undefined;
if (tokenizer.consumeChar('=')) {
({success, token} = tokenizer.consumeTokenOrQuotedString());
if (!success) {
return undefined;
}
value = token;
}
parameters.push({name, value});
}
return {type: type + '/' + subtype, parameters};
}

function quote(s) {
let needsQuotation = false;
for (const c of s) {
needsQuotation = needsQuotation || !isTokenChar(c);
}
if (!needsQuotation)
return s;

return '"' + s.replace(/\\/g, '\\\\').replace(/\"/g, '\\"') + '"'
}

function stringify({type, parameters}) {
const flattenParameters = parameters.map(
({name, value}) => value ? `${name}=${quote(value)}` : name)
if (flattenParameters.length === 0) {
return type;
}
return type + ';' + flattenParameters.join(';');
}

function request(input, expected, title) {
title = title || document.title + ' - ' + input;
test(function() {
var client = new XMLHttpRequest();
client.open("POST", "resources/content.py", false);
if(input)
client.setRequestHeader("Content-Type", input);
client.send("TEST");
assert_equals(client.responseText, "TEST");
const actual = client.getResponseHeader("x-request-content-type");

if (parse(expected)) {
assert_equals(stringify(parse(actual)), expected);
} else {
// An invalid MIME type should be left unmodified.
assert_equals(actual, expected);
}
}, title);
}

request(
"text; charset=ascii",
"text; charset=ascii",
"header with invalid MIME type is not changed"
)
request(
"charset=ascii",
"charset=ascii",
"known charset but bogus header - missing MIME type"
)
request(
"charset=bogus",
"charset=bogus",
"bogus charset and bogus header - missing MIME type"
)
request(
"text/plain;charset=utf-8",
"text/plain;charset=utf-8",
"Correct text/plain MIME with charset"
)
request(
"text/x-pink-unicorn",
"text/x-pink-unicorn",
"If no charset= param is given, implementation should not add one - unknown MIME"
)
request(
"text/plain",
"text/plain",
"If no charset= param is given, implementation should not add one - known MIME"
)
request(
"text/x-thepiano;charset= waddup",
"text/x-thepiano;charset=UTF-8",
"charset given but wrong, fix it (unknown MIME, bogus charset)"
)
request(
"text/plain;charset=utf-8;charset=waddup",
"text/plain;charset=utf-8;charset=UTF-8",
"charset given but wrong, fix it (known MIME, bogus charset)"
)
request(
"text/plain;charset=shift-jis",
"text/plain;charset=UTF-8",
"charset given but wrong, fix it (known MIME, actual charset)"
)
request(
"text/x-pink-unicorn; charset=windows-1252; charset=bogus; notrelated; charset=ascii",
"text/x-pink-unicorn; charset=UTF-8; charset=UTF-8; notrelated; charset=UTF-8",
"If multiple charset parameters are given, all should be rewritten"
)
request(
null,
"text/plain;charset=UTF-8",
"No content type set, give MIME and charset"
)
request(
"text; charset=ascii",
"text; charset=ascii",
"header with invalid MIME type is not changed"
);
request(
"charset=ascii",
"charset=ascii",
"known charset but bogus header - missing MIME type"
);
request(
"charset=bogus",
"charset=bogus",
"bogus charset and bogus header - missing MIME type"
);
request(
"text/plain;charset=utf-8",
"text/plain;charset=utf-8",
"Correct text/plain MIME with charset"
);
request(
"text/x-pink-unicorn",
"text/x-pink-unicorn",
"If no charset= param is given, implementation should not add one - unknown MIME"
);
request(
"text/plain",
"text/plain",
"If no charset= param is given, implementation should not add one - known MIME"
);
request(
"text/x-thepiano;charset= waddup",
"text/x-thepiano;charset=UTF-8",
"charset given but wrong, fix it (unknown MIME, bogus charset)"
);
request(
"text/plain;charset=utf-8;charset=waddup",
"text/plain;charset=utf-8;charset=UTF-8",
"charset given but wrong, fix it (known MIME, bogus charset)"
);
request(
"text/plain;charset=shift-jis",
"text/plain;charset=UTF-8",
"charset given but wrong, fix it (known MIME, actual charset)"
);
request(
"text/x-pink-unicorn; charset=windows-1252; charset=bogus; notrelated; charset=ascii",
"text/x-pink-unicorn;charset=UTF-8;charset=UTF-8;notrelated;charset=UTF-8",
"If multiple charset parameters are given, all should be rewritten"
);
request(
null,
"text/plain;charset=UTF-8",
"No content type set, give MIME and charset"
);
</script>
</body>
</html>

0 comments on commit fe6dd72

Please sign in to comment.