[XHR] Introduce a mime type parser to a WPT

This CL introduces a mime type parser and stringifier to wpt/XMLHttpRequest/send-content-type-charset in order to accept implementations that are actually conforming to the spec but were rejected by the test due to some text representation errors. Bug: whatwg/mimesniff#39 Change-Id: I99466e2e596bb9c1b7f11267ad4ff0a886913086
web-platform-tests · Nov 21, 2017 · fe6dd72 · fe6dd72
1 parent 82544df
commit fe6dd72
Showing 1 changed file with 240 additions and 66 deletions.
diff --git a/XMLHttpRequest/send-content-type-charset.htm b/XMLHttpRequest/send-content-type-charset.htm
@@ -10,74 +10,248 @@
   <body>
     <div id="log"></div>
     <script>
-      function request(input, output, title) {
-        title = title || document.title + ' - ' + input;
-        test(function() {
-        var client = new XMLHttpRequest()
-        client.open("POST", "resources/content.py", false)
-        if(input)
-          client.setRequestHeader("Content-Type", input)
-        client.send("TEST")
-        assert_equals(client.responseText, "TEST")
-        assert_equals(client.getResponseHeader("x-request-content-type"), output)
-        }, title)
+function isTokenChar(c) {
+  const code = c.charCodeAt(0);
+  if (code >= 127)
+    return false;
+  if (code < 0x20)
+    return false;
+  switch (c) {
+    case ' ':
+    case ';':
+    case '"':
+    case '(':
+    case ')':
+    case '<':
+    case '>':
+    case '@':
+    case ',':
+    case ':':
+    case '\\':
+    case '/':
+    case '[':
+    case ']':
+    case '?':
+    case '=':
+      return false;
+    default:
+      return true;
+  }
+}
+class Tokenizer {
+  constructor(string) {
+    this.string = string;
+    this.index = 0;
+    this.skipSpaces();
+  }
+
+  consumeChar(c) {
+    const s = this.string;
+    if (this.isDone() || s[this.index] !== c) {
+      return false;
+    }
+    ++this.index;
+    this.skipSpaces();
+    return true;
+  }
+  consumeToken() {
+    const start = this.index;
+    const s = this.string;
+    while (!this.isDone() && isTokenChar(s[this.index])) {
+      ++this.index;
+    }
+    if (start === this.index) {
+      return {success: false, token: undefined};
+    }
+    const token = s.substring(start, this.index);
+    this.skipSpaces();
+    return {success: true, token};
+  }
+  consumeQuotedString() {
+    const s = this.string;
+    const start = this.index;
+    let contents = '';
+    if (this.isDone() || s[this.index] != '"') {
+      return false;
+    }
+    ++this.index;
+    while (!this.isDone()) {
+      if (s[this.index] === '"') {
+        ++this.index;
+        this.skipSpaces();
+        return {success: true, token: contents};
+      }
+      if (s[this.index] === '\\') {
+        ++this.index;
+        if (this.isDone()) {
+          this.index = start;
+          return {success: false, token: undefined};
+        }
+      }
+      contents += s[this.index];
+      ++this.index;
+    }
+    this.index = start;
+    return {success: false, token: undefined};
+  }
+  consumeTokenOrQuotedString() {
+    if (this.isDone()) {
+      return false;
+    }
+    if (this.string[this.index] === '"') {
+      return this.consumeQuotedString();
+    }
+    return this.consumeToken();
+  }
+
+  skipSpaces() {
+    const s = this.string;
+    while (!this.isDone() &&
+           (s[this.index] === ' ' || s[this.index] === '\t')) {
+      this.index++;
+    }
+  }
+
+  isDone() { return this.index >= this.string.length; }
+}
+function parse(string) {
+  const tokenizer = new Tokenizer(string);
+  const parameters = [];
+  let success, token;
+
+  ({success, token} = tokenizer.consumeToken());
+  if (!success) {
+    return undefined;
+  }
+
+  const type = token;
+  if (!tokenizer.consumeChar('/')) {
+    return undefined;
+  }
+
+  ({success, token} = tokenizer.consumeToken());
+  if (!success) {
+    return undefined;
+  }
+  const subtype = token;
+
+  while (!tokenizer.isDone()) {
+    if (!tokenizer.consumeChar(';')) {
+      return undefined;
+    }
+    ({success, token} = tokenizer.consumeToken());
+    if (!success) {
+      return undefined;
+    }
+    const name = token;
+    let value = undefined;
+    if (tokenizer.consumeChar('=')) {
+      ({success, token} = tokenizer.consumeTokenOrQuotedString());
+      if (!success) {
+        return undefined;
       }
+      value = token;
+    }
+    parameters.push({name, value});
+  }
+  return {type: type + '/' + subtype, parameters};
+}
+
+function quote(s) {
+  let needsQuotation = false;
+  for (const c of s) {
+    needsQuotation = needsQuotation || !isTokenChar(c);
+  }
+  if (!needsQuotation)
+    return s;
+
+  return '"' + s.replace(/\\/g, '\\\\').replace(/\"/g, '\\"') + '"'
+}
+
+function stringify({type, parameters}) {
+  const flattenParameters = parameters.map(
+    ({name, value}) => value ? `${name}=${quote(value)}` : name)
+  if (flattenParameters.length === 0)  {
+    return type;
+  }
+  return type + ';' + flattenParameters.join(';');
+}
+
+function request(input, expected, title) {
+  title = title || document.title + ' - ' + input;
+  test(function() {
+    var client = new XMLHttpRequest();
+    client.open("POST", "resources/content.py", false);
+    if(input)
+      client.setRequestHeader("Content-Type", input);
+    client.send("TEST");
+    assert_equals(client.responseText, "TEST");
+    const actual = client.getResponseHeader("x-request-content-type");
+
+    if (parse(expected)) {
+      assert_equals(stringify(parse(actual)), expected);
+    } else {
+      // An invalid MIME type should be left unmodified.
+      assert_equals(actual, expected);
+    }
+  }, title);
+}
 
-      request(
-        "text; charset=ascii",
-        "text; charset=ascii",
-        "header with invalid MIME type is not changed"
-      )
-      request(
-        "charset=ascii",
-        "charset=ascii",
-        "known charset but bogus header - missing MIME type"
-      )
-      request(
-        "charset=bogus",
-        "charset=bogus",
-        "bogus charset and bogus header - missing MIME type"
-      )
-      request(
-        "text/plain;charset=utf-8",
-        "text/plain;charset=utf-8",
-        "Correct text/plain MIME with charset"
-      )
-      request(
-        "text/x-pink-unicorn",
-        "text/x-pink-unicorn",
-        "If no charset= param is given, implementation should not add one - unknown MIME"
-      )
-      request(
-        "text/plain",
-        "text/plain",
-        "If no charset= param is given, implementation should not add one - known MIME"
-      )
-      request(
-        "text/x-thepiano;charset= waddup",
-        "text/x-thepiano;charset=UTF-8",
-        "charset given but wrong, fix it (unknown MIME, bogus charset)"
-      )
-      request(
-        "text/plain;charset=utf-8;charset=waddup",
-        "text/plain;charset=utf-8;charset=UTF-8",
-        "charset given but wrong, fix it (known MIME, bogus charset)"
-      )
-      request(
-        "text/plain;charset=shift-jis",
-        "text/plain;charset=UTF-8",
-        "charset given but wrong, fix it (known MIME, actual charset)"
-      )
-      request(
-        "text/x-pink-unicorn; charset=windows-1252; charset=bogus; notrelated; charset=ascii",
-        "text/x-pink-unicorn; charset=UTF-8; charset=UTF-8; notrelated; charset=UTF-8",
-        "If multiple charset parameters are given, all should be rewritten"
-      )
-      request(
-        null,
-        "text/plain;charset=UTF-8",
-        "No content type set, give MIME and charset"
-      )
+request(
+  "text; charset=ascii",
+  "text; charset=ascii",
+  "header with invalid MIME type is not changed"
+);
+request(
+  "charset=ascii",
+  "charset=ascii",
+  "known charset but bogus header - missing MIME type"
+);
+request(
+  "charset=bogus",
+  "charset=bogus",
+  "bogus charset and bogus header - missing MIME type"
+);
+request(
+  "text/plain;charset=utf-8",
+  "text/plain;charset=utf-8",
+  "Correct text/plain MIME with charset"
+);
+request(
+  "text/x-pink-unicorn",
+  "text/x-pink-unicorn",
+  "If no charset= param is given, implementation should not add one - unknown MIME"
+);
+request(
+  "text/plain",
+  "text/plain",
+  "If no charset= param is given, implementation should not add one - known MIME"
+);
+request(
+  "text/x-thepiano;charset= waddup",
+  "text/x-thepiano;charset=UTF-8",
+  "charset given but wrong, fix it (unknown MIME, bogus charset)"
+);
+request(
+  "text/plain;charset=utf-8;charset=waddup",
+  "text/plain;charset=utf-8;charset=UTF-8",
+  "charset given but wrong, fix it (known MIME, bogus charset)"
+);
+request(
+  "text/plain;charset=shift-jis",
+  "text/plain;charset=UTF-8",
+  "charset given but wrong, fix it (known MIME, actual charset)"
+);
+request(
+  "text/x-pink-unicorn; charset=windows-1252; charset=bogus; notrelated; charset=ascii",
+  "text/x-pink-unicorn;charset=UTF-8;charset=UTF-8;notrelated;charset=UTF-8",
+  "If multiple charset parameters are given, all should be rewritten"
+);
+request(
+  null,
+  "text/plain;charset=UTF-8",
+  "No content type set, give MIME and charset"
+);
     </script>
   </body>
 </html>