Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
URL: percent-encoding test framework basics
- Loading branch information
Showing
4 changed files
with
108 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
promise_test(() => fetch("resources/percent-encoding.json").then(res => res.json()).then(runTests), "Loading data…"); | ||
|
||
function runTests(testUnits) { | ||
for (const testUnit of testUnits) { | ||
// Ignore comments | ||
if (typeof testUnit === "string") { | ||
continue; | ||
} | ||
for (const encoding of Object.keys(testUnit.output)) { | ||
async_test(t => { | ||
const frame = document.body.appendChild(document.createElement("iframe")); | ||
t.add_cleanup(() => frame.remove()); | ||
frame.onload = t.step_func_done(() => { | ||
const output = frame.contentDocument.querySelector("a"); | ||
// Test that the fragment is always UTF-8 encoded | ||
assert_equals(output.hash, `#${testUnit.output["utf-8"]}`, "fragment"); | ||
assert_equals(output.search, `?${testUnit.output[encoding]}`, "query"); | ||
}); | ||
frame.src = `resources/percent-encoding.py?encoding=${encoding}&value=${toBase64(testUnit.input)}`; | ||
}, `Input ${testUnit.input} with encoding ${encoding}`); | ||
} | ||
} | ||
} | ||
|
||
// Use base64 to avoid relying on the URL parser to get UTF-8 percent-encoding correctly. This does | ||
// not use btoa directly as that only works with code points in the range U+0000 to U+00FF, | ||
// inclusive. | ||
function toBase64(input) { | ||
const bytes = new TextEncoder().encode(input); | ||
const byteString = Array.from(bytes, byte => String.fromCharCode(byte)).join(""); | ||
const encoded = self.btoa(byteString); | ||
return encoded; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
[ | ||
"Tests for percent-encoding.", | ||
{ | ||
"input": "\u2020", | ||
"output": { | ||
"big5": "%26%238224%3B", | ||
"euc-kr": "%A2%D3", | ||
"utf-8": "%E2%80%A0", | ||
"windows-1252": "%86" | ||
} | ||
}, | ||
"This uses a trailing A to prevent the URL parser from trimming the C0 control.", | ||
{ | ||
"input": "\u000EA", | ||
"output": { | ||
"big5": "%0EA", | ||
"iso-2022-jp": "%26%2365533%3BA", | ||
"utf-8": "%0EA" | ||
} | ||
}, | ||
{ | ||
"input": "\u203E\u005C", | ||
"output": { | ||
"iso-2022-jp": "%1B(J~%1B(B\\", | ||
"utf-8": "%E2%80%BE\\" | ||
} | ||
}, | ||
{ | ||
"input": "\uE5E5", | ||
"output": { | ||
"gb18030": "%26%2358853%3B", | ||
"utf-8": "%EE%97%A5" | ||
} | ||
}, | ||
{ | ||
"input": "\u2212", | ||
"output": { | ||
"shift_jis": "%81|", | ||
"utf-8": "%E2%88%92" | ||
} | ||
}, | ||
{ | ||
"input": "á|", | ||
"output": { | ||
"utf-8": "%C3%A1|" | ||
} | ||
} | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
import base64 | ||
from wptserve.utils import isomorphic_decode | ||
|
||
# Use numeric references to let the HTML parser take care of inserting the correct code points | ||
# rather than trying to figure out the necessary bytes for each encoding. (The latter can be | ||
# especially tricky given that Python does not implement the Encoding Standard.) | ||
def numeric_references(input): | ||
output = b"" | ||
for cp in input: | ||
output += b"&#x" + format(ord(cp), b"X") + b";" | ||
return output | ||
|
||
def main(request, response): | ||
# Undo the "magic" space with + replacement as otherwise base64 decoding will fail. | ||
value = request.GET.first(b"value").replace(" ", "+") | ||
encoding = request.GET.first(b"encoding") | ||
|
||
output_value = numeric_references(base64.b64decode(value).decode(b"utf-8")) | ||
return ( | ||
[(b"Content-Type", b"text/html;charset=" + encoding)], | ||
b"""<!doctype html> | ||
<a href="https://doesnotmatter.invalid/?%s#%s">test</a> | ||
""" % (output_value, output_value)) |