Skip to content

Commit

Permalink
URL: percent-encoding test framework basics
Browse files Browse the repository at this point in the history
  • Loading branch information
annevk committed Nov 3, 2020
1 parent 7714033 commit 09d8830
Show file tree
Hide file tree
Showing 4 changed files with 108 additions and 0 deletions.
4 changes: 4 additions & 0 deletions url/README.md
Expand Up @@ -44,6 +44,10 @@ expected to fail.
Tests in `/encoding` and `/html/infrastructure/urls/resolving-urls/query-encoding/` cover the
encoding argument to the URL parser.

There's also limited coverage in `resources/percent-encoding.json` for percent-encode after encoding
with _percentEncodeSet_ set to special-query percent-encode set and _spaceAsPlus_ set to false.
(Improvements to expand coverage here are welcome.)

## Specification

The tests in this directory assert conformance with [the URL Standard][URL].
Expand Down
33 changes: 33 additions & 0 deletions url/percent-encoding.window.js
@@ -0,0 +1,33 @@
promise_test(() => fetch("resources/percent-encoding.json").then(res => res.json()).then(runTests), "Loading data…");

function runTests(testUnits) {
for (const testUnit of testUnits) {
// Ignore comments
if (typeof testUnit === "string") {
continue;
}
for (const encoding of Object.keys(testUnit.output)) {
async_test(t => {
const frame = document.body.appendChild(document.createElement("iframe"));
t.add_cleanup(() => frame.remove());
frame.onload = t.step_func_done(() => {
const output = frame.contentDocument.querySelector("a");
// Test that the fragment is always UTF-8 encoded
assert_equals(output.hash, `#${testUnit.output["utf-8"]}`, "fragment");
assert_equals(output.search, `?${testUnit.output[encoding]}`, "query");
});
frame.src = `resources/percent-encoding.py?encoding=${encoding}&value=${toBase64(testUnit.input)}`;
}, `Input ${testUnit.input} with encoding ${encoding}`);
}
}
}

// Use base64 to avoid relying on the URL parser to get UTF-8 percent-encoding correctly. This does
// not use btoa directly as that only works with code points in the range U+0000 to U+00FF,
// inclusive.
function toBase64(input) {
const bytes = new TextEncoder().encode(input);
const byteString = Array.from(bytes, byte => String.fromCharCode(byte)).join("");
const encoded = self.btoa(byteString);
return encoded;
}
48 changes: 48 additions & 0 deletions url/resources/percent-encoding.json
@@ -0,0 +1,48 @@
[
"Tests for percent-encoding.",
{
"input": "\u2020",
"output": {
"big5": "%26%238224%3B",
"euc-kr": "%A2%D3",
"utf-8": "%E2%80%A0",
"windows-1252": "%86"
}
},
"This uses a trailing A to prevent the URL parser from trimming the C0 control.",
{
"input": "\u000EA",
"output": {
"big5": "%0EA",
"iso-2022-jp": "%26%2365533%3BA",
"utf-8": "%0EA"
}
},
{
"input": "\u203E\u005C",
"output": {
"iso-2022-jp": "%1B(J~%1B(B\\",
"utf-8": "%E2%80%BE\\"
}
},
{
"input": "\uE5E5",
"output": {
"gb18030": "%26%2358853%3B",
"utf-8": "%EE%97%A5"
}
},
{
"input": "\u2212",
"output": {
"shift_jis": "%81|",
"utf-8": "%E2%88%92"
}
},
{
"input": "á|",
"output": {
"utf-8": "%C3%A1|"
}
}
]
23 changes: 23 additions & 0 deletions url/resources/percent-encoding.py
@@ -0,0 +1,23 @@
import base64
from wptserve.utils import isomorphic_decode

# Use numeric references to let the HTML parser take care of inserting the correct code points
# rather than trying to figure out the necessary bytes for each encoding. (The latter can be
# especially tricky given that Python does not implement the Encoding Standard.)
def numeric_references(input):
output = b""
for cp in input:
output += b"&#x" + format(ord(cp), b"X") + b";"
return output

def main(request, response):
# Undo the "magic" space with + replacement as otherwise base64 decoding will fail.
value = request.GET.first(b"value").replace(" ", "+")
encoding = request.GET.first(b"encoding")

output_value = numeric_references(base64.b64decode(value).decode(b"utf-8"))
return (
[(b"Content-Type", b"text/html;charset=" + encoding)],
b"""<!doctype html>
<a href="https://doesnotmatter.invalid/?%s#%s">test</a>
""" % (output_value, output_value))

0 comments on commit 09d8830

Please sign in to comment.