Skip to content

Commit

Permalink
Implement parseHTMLUnsafe and setHTMLUnsafe
Browse files Browse the repository at this point in the history
I am speccing this here: whatwg/html#9538

Bug: 1478969
Change-Id: Ie55827cebdf349aadae13fbf1086baf6177bbff2
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/4824679
Reviewed-by: Mason Freed <masonf@chromium.org>
Commit-Queue: Joey Arhar <jarhar@chromium.org>
Cr-Commit-Position: refs/heads/main@{#1210412}
  • Loading branch information
josepharhar authored and chromium-wpt-export-bot committed Oct 16, 2023
1 parent c71b71b commit 130ab7c
Show file tree
Hide file tree
Showing 19 changed files with 411 additions and 12 deletions.
@@ -0,0 +1,35 @@
<!DOCTYPE html>
<meta charset="windows-1252"> <!-- intentional to make sure the results are UTF-8 anyway -->
<link rel=author href="mailto:jarhar@chromium.org">
<link rel=help href="https://github.com/whatwg/html/pull/9538">
<script src="/resources/testharness.js"></script>
<script src="/resources/testharnessreport.js"></script>

<!-- This was adapted from DOMParser-parseFromString-encoding.html -->

<script>
function assertEncoding(doc) {
assert_equals(doc.charset, "UTF-8", "document.charset");
assert_equals(doc.characterSet, "UTF-8", "document.characterSet");
assert_equals(doc.inputEncoding, "UTF-8", "document.characterSet");
}

setup(() => {
assert_equals(document.characterSet, "windows-1252", "the meta charset must be in effect, making the main document windows-1252");
});

test(() => {
const doc = Document.parseHTMLUnsafe('');
assertEncoding(doc);
}, 'Parse empty string');

test(() => {
const doc = Document.parseHTMLUnsafe(`<meta charset="latin2">`);
assertEncoding(doc);
}, "meta charset latin2");

test(() => {
const doc = Document.parseHTMLUnsafe(`<?xml version="1.0" encoding="latin2"?><x/>`);
assertEncoding(doc);
}, "XML declaration");
</script>
@@ -0,0 +1,54 @@
<!doctype html>
<link rel=author href="mailto:jarhar@chromium.org">
<!-- This test was adapted from style_attribute_html.html -->
<meta charset=utf-8>
<title>Style attribute in HTML</title>
<script src=/resources/testharness.js></script>
<script src=/resources/testharnessreport.js></script>
<script>

var div;
setup(function() {
var input = '<div style="color: red">Foo</div>';
var doc = Document.parseHTMLUnsafe(input);
div = doc.querySelector('div');
});

test(function() {
var style = div.style;
assert_equals(style.cssText, 'color: red;');
assert_equals(style.color, 'red');
assert_equals(div.getAttribute("style"), 'color: red',
'Value of style attribute should match the string value that was set');
}, 'Parsing of initial style attribute');

test(function() {
var style = div.style;
div.setAttribute('style', 'color:: invalid');
assert_equals(style.cssText, '');
assert_equals(style.color, '');
assert_equals(div.getAttribute('style'), 'color:: invalid',
'Value of style attribute should match the string value that was set');
}, 'Parsing of invalid style attribute');

test(function() {
var style = div.style;
div.setAttribute('style', 'color: green');
assert_equals(style.cssText, 'color: green;');
assert_equals(style.color, 'green');
assert_equals(div.getAttribute('style'), 'color: green',
'Value of style attribute should match the string value that was set');
}, 'Parsing of style attribute');

test(function() {
var style = div.style;
style.backgroundColor = 'blue';
assert_equals(style.cssText, 'color: green; background-color: blue;',
'Should not drop the existing style');
assert_equals(style.color, 'green',
'Should not drop the existing style');
assert_equals(div.getAttribute('style'), 'color: green; background-color: blue;',
'Should update style attribute');
}, 'Update style.backgroundColor');

</script>
@@ -0,0 +1,15 @@
<!doctype html>
<link rel=author href="mailto:jarhar@chromium.org">
<!-- This test was adapted from DOMParser-parseFromString-url-base-pushstate.html -->
<title>parseHTMLUnsafe test of how the document's URL is set (base, pushstate)</title>
<base href="/fake/base-from-outer-frame">
<script src="/resources/testharness.js"></script>
<script src="/resources/testharnessreport.js"></script>

<iframe src="/html/webappapis/dynamic-markup-insertion/html-unsafe-methods/resources/parseHTMLUnsafe-iframe-base-pushstate.html" onload="window.resolveLoadPromise();"></iframe>

<script>
"use strict";
history.pushState(null, "", "/fake/push-state-from-outer-frame");
</script>
<script src="/html/webappapis/dynamic-markup-insertion/html-unsafe-methods/resources/parseHTMLUnsafe-url-tests.js"></script>
@@ -0,0 +1,10 @@
<!doctype html>
<link rel=author href="mailto:jarhar@chromium.org">
<!-- This was adapted from DOMParser-parseFromString-url-base.html -->
<title>parseHTMLUnsafe test of how the document's URL is set (base, no pushstate)</title>
<base href="/fake/base-from-outer-frame">
<script src="/resources/testharness.js"></script>
<script src="/resources/testharnessreport.js"></script>

<iframe src="/html/webappapis/dynamic-markup-insertion/html-unsafe-methods/resources/parseHTMLUnsafe-iframe-base.html" onload="window.resolveLoadPromise();"></iframe>
<script src="/html/webappapis/dynamic-markup-insertion/html-unsafe-methods/resources/parseHTMLUnsafe-url-tests.js"></script>
@@ -0,0 +1,42 @@
<!DOCTYPE html>
<link rel=author href="mailto:jarhar@chromium.org">
<!-- This test was adapted from DOMParser-parseFromString-url-moretests.html -->
<meta charset=utf-8>
<title>Document.parseHTMLUnsafe: Document's url</title>
<script src="/resources/testharness.js"></script>
<script src="/resources/testharnessreport.js"></script>
<div id=log></div>
<script>
async_test(function() {
var iframe = document.createElement("iframe");
iframe.onload = this.step_func(function() {
var child = iframe.contentWindow;

test(function() {
var doc = Document.parseHTMLUnsafe("<html></html>");
assert_equals(doc.URL, "about:blank");
}, "Parent window");

test(function() {
var doc = child.Document.parseHTMLUnsafe("<html></html>");
assert_equals(doc.URL, "about:blank");
}, "Child window");

var dpBeforeNavigation = child.Document, urlBeforeNavigation = child.document.URL;
iframe.onload = this.step_func_done(function() {
test(function() {
var doc = dpBeforeNavigation.parseHTMLUnsafe("<html></html>");
assert_equals(doc.URL, "about:blank");
}, "Child window crossing navigation");

test(function() {
var doc = child.Document.parseHTMLUnsafe("<html></html>");
assert_equals(doc.URL, "about:blank");
}, "Child window after navigation");
});
iframe.src = "/common/blank.html?2";
});
iframe.src = "/common/blank.html?1";
document.body.appendChild(iframe);
});
</script>
@@ -0,0 +1,14 @@
<!doctype html>
<link rel=author href="mailto:jarhar@chromium.org">
<!-- This test was adapted from DOMParser-parseFromString-url-pushstate.html -->
<title>parseHTMLUnsafe test of how the document's URL is set (no base, pushstate)</title>
<script src="/resources/testharness.js"></script>
<script src="/resources/testharnessreport.js"></script>

<iframe src="/html/webappapis/dynamic-markup-insertion/html-unsafe-methods/resources/parseHTMLUnsafe-iframe-pushstate.html" onload="window.resolveLoadPromise();"></iframe>

<script>
"use strict";
history.pushState(null, "", "/fake/push-state-from-outer-frame");
</script>
<script src="/html/webappapis/dynamic-markup-insertion/html-unsafe-methods/resources/parseHTMLUnsafe-url-tests.js"></script>
@@ -0,0 +1,9 @@
<!doctype html>
<link rel=author href="mailto:jarhar@chromium.org">
<!-- This test was adapted from DOMParser-parseFromString-url.html -->
<title>parseHTMLUnsafe test of how the document's URL is set (no pushstate, no base)</title>
<script src="/resources/testharness.js"></script>
<script src="/resources/testharnessreport.js"></script>

<iframe src="resources/parseHTMLUnsafe-iframe.html" onload="window.resolveLoadPromise();"></iframe>
<script src="resources/parseHTMLUnsafe-url-tests.js"></script>
@@ -0,0 +1,77 @@
<!doctype html>
<link rel=author href="mailto:jarhar@chromium.org">
<!-- This was adapted from DOMParser-parseFromString-html.html -->
<title>parseHTMLUnsafe basic test of HTML parsing</title>
<script src="/resources/testharness.js"></script>
<script src="/resources/testharnessreport.js"></script>
<script>
// |expected| should be an object indicating the expected type of node.
function assert_node(actual, expected) {
assert_true(actual instanceof expected.type,
'Node type mismatch: actual = ' + actual.constructor.name + ', expected = ' + expected.type.name);
if (typeof(expected.id) !== 'undefined')
assert_equals(actual.id, expected.id, expected.idMessage);
}

var doc;
setup(function() {
doc = Document.parseHTMLUnsafe('<html id="root"><head></head><body></body></html>');
});

test(function() {
var root = doc.documentElement;
assert_node(root, { type: HTMLHtmlElement, id: 'root',
idMessage: 'documentElement id attribute should be root.' });
}, 'Parsing of id attribute');

test(function() {
assert_equals(doc.contentType, "text/html")
}, 'contentType');

test(function() {
assert_equals(doc.compatMode, "BackCompat")
}, 'compatMode');

test(function() {
doc = Document.parseHTMLUnsafe('<!DOCTYPE html><html id="root"><head></head><body></body></html>');
assert_equals(doc.compatMode, "CSS1Compat")
}, 'compatMode for a proper DOCTYPE');

// URL- and encoding-related stuff tested separately.

test(function() {
assert_equals(doc.location, null,
'The document must have a location value of null.');
}, 'Location value');

test(function() {
var htmldoc = Document.parseHTMLUnsafe("<!DOCTYPE foo></><foo></multiple></>");
assert_equals(htmldoc.documentElement.localName, "html");
assert_equals(htmldoc.documentElement.namespaceURI, "http://www.w3.org/1999/xhtml");
}, "Document.parseHTMLUnsafe parses HTML tag soup with no problems");

test(function() {
const doc = Document.parseHTMLUnsafe('<noembed>&lt;a&gt;</noembed>');
assert_equals(doc.querySelector('noembed').textContent, '&lt;a&gt;');
}, 'Document.parseHTMLUnsafe should handle the content of <noembed> as raw text');

test(() => {
const doc = Document.parseHTMLUnsafe(`
<html><body>
<style>
@import url(/dummy.css)
</style>
<script>document.x = 8<\/script>
</body></html>`);

assert_not_equals(doc.querySelector('script'), null, 'script must be found');
assert_equals(doc.x, undefined, 'script must not be executed on the inner document');
assert_equals(document.x, undefined, 'script must not be executed on the outer document');
}, 'script is found synchronously even when there is a css import');

test(() => {
const doc = Document.parseHTMLUnsafe(`<body><noscript><p id="test1">test1<p id="test2">test2</noscript>`);
assert_node(doc.body.firstChild.childNodes[0], { type: HTMLParagraphElement, id: 'test1' });
assert_node(doc.body.firstChild.childNodes[1], { type: HTMLParagraphElement, id: 'test2' });
}, 'must be parsed with scripting disabled, so noscript works');
</script>
@@ -0,0 +1,25 @@
<!DOCTYPE html>
<title>setHTMLUnsafe in HTML</title>
<link rel="author" title="Ms2ger" href="mailto:ms2ger@gmail.com">
<link rel=author href="mailto:jarhar@chromium.org">
<!-- This test was adapted from innerhtml-04.html -->
<script src="/resources/testharness.js"></script>
<script src="/resources/testharnessreport.js"></script>
<div id="log"></div>
<script>
function testIsChild(p, c) {
assert_equals(p.firstChild, c);
assert_equals(c.parentNode, p);
}
test(function() {
var p = document.createElement('p');
var b = p.appendChild(document.createElement('b'));
var t = b.appendChild(document.createTextNode("foo"));
testIsChild(p, b);
testIsChild(b, t);
assert_equals(t.data, "foo");
p.setHTMLUnsafe("");
testIsChild(b, t);
assert_equals(t.data, "foo");
}, "setHTMLUnsafe should leave the removed children alone.")
</script>
@@ -0,0 +1,10 @@
<!DOCTYPE html>
<meta charset="utf-8">
<title>An iframe that does parseHTMLUnsafe stuff with base and pushstates itself</title>
<base href="/fake/base-from-iframe">

<script>
"use strict";
history.pushState(null, "", "/fake/push-state-from-iframe");
</script>
<script src="/html/webappapis/dynamic-markup-insertion/html-unsafe-methods/resources/parseHTMLUnsafe-iframe.js"></script>
@@ -0,0 +1,6 @@
<!DOCTYPE html>
<meta charset="utf-8">
<title>An iframe that does parseHTMLUnsafe stuff with base</title>
<base href="/fake/base-from-iframe">

<script src="/html/webappapis/dynamic-markup-insertion/html-unsafe-methods/resources/parseHTMLUnsafe-iframe.js"></script>
@@ -0,0 +1,9 @@
<!DOCTYPE html>
<meta charset="utf-8">
<title>An iframe that does parseHTMLUnsafe stuff and pushstates itself</title>

<script>
"use strict";
history.pushState(null, "", "/fake/push-state-from-iframe");
</script>
<script src="/html/webappapis/dynamic-markup-insertion/html-unsafe-methods/resources/parseHTMLUnsafe-iframe.js"></script>
@@ -0,0 +1,4 @@
<!DOCTYPE html>
<meta charset="utf-8">
<title>An iframe that does parseHTMLUnsafe stuff</title>
<script src="parseHTMLUnsafe-iframe.js"></script>
@@ -0,0 +1,3 @@
window.doParse = (html) => {
return Document.parseHTMLUnsafe(html);
};
@@ -0,0 +1,36 @@
const loadPromise = new Promise(resolve => { window.resolveLoadPromise = resolve; });

function assertURL(doc) {
assert_equals(doc.URL, "about:blank", "document.URL");
assert_equals(doc.documentURI, "about:blank", "document.documentURI");
assert_equals(doc.baseURI, "about:blank", "document.baseURI");
}

const inputs = {
valid: "<html></html>",
"invalid XML": `<span x:test="testing">1</span>`
};

for (const [inputName, input] of Object.entries(inputs)) {
test(() => {
const doc = Document.parseHTMLUnsafe(input);

assertURL(doc);
}, `${inputName}: created normally`);

promise_test(async () => {
await loadPromise;

const doc = frames[0].Document.parseHTMLUnsafe(input);

assertURL(doc);
}, `${inputName}: created using another iframe's parseHTMLUnsafe from this frame`);

promise_test(async () => {
await loadPromise;

const doc = frames[0].doParse(input);

assertURL(doc);
}, `${inputName}: created using another iframe's parseHTMLUnsafe from that frame`);
}

0 comments on commit 130ab7c

Please sign in to comment.