diff --git a/lib/resource-handler/html/html-source-element.js b/lib/resource-handler/html/html-source-element.js index 9225faed..be67d086 100644 --- a/lib/resource-handler/html/html-source-element.js +++ b/lib/resource-handler/html/html-source-element.js @@ -38,7 +38,9 @@ class HtmlSourceElement { * @param {string} newData */ setData (newData) { - this.rule.attr ? this.el.attr(this.rule.attr, newData) : this.el.text(newData); + // todo: encode can be removed after https://github.com/cheeriojs/cheerio/issues/957 fixed + const escapedData = utils.encodeHtmlEntities(newData); + this.rule.attr ? this.el.attr(this.rule.attr, escapedData) : this.el.text(newData); } removeIntegrityCheck () { diff --git a/lib/utils/index.js b/lib/utils/index.js index bebe842b..f4fbf120 100644 --- a/lib/utils/index.js +++ b/lib/utils/index.js @@ -146,6 +146,10 @@ function decodeHtmlEntities (text) { return typeof text === 'string' ? htmlEntities.decode(text) : ''; } +function encodeHtmlEntities (text) { + return typeof text === 'string' ? htmlEntities.escape(text) : ''; +} + function clone (obj) { return Object.assign({}, obj); } @@ -188,6 +192,7 @@ module.exports = { getTypeByMime, getTypeByFilename, decodeHtmlEntities, + encodeHtmlEntities, clone, extend, union, diff --git a/test/functional/html-entities-in-url/html-entities-in-url.test.js b/test/functional/html-entities/html-entities.test.js similarity index 81% rename from test/functional/html-entities-in-url/html-entities-in-url.test.js rename to test/functional/html-entities/html-entities.test.js index fca77030..41fc5223 100644 --- a/test/functional/html-entities-in-url/html-entities-in-url.test.js +++ b/test/functional/html-entities/html-entities.test.js @@ -6,7 +6,7 @@ var scrape = require('../../../index'); var testDirname = __dirname + '/.tmp'; var mockDirname = __dirname + '/mocks'; -describe('Functional: html entities in url', function() { +describe('Functional: html entities', function() { beforeEach(function() { nock.cleanAll(); @@ -27,7 +27,9 @@ describe('Functional: html entities in url', function() { // /fonts?family=Myriad&v=2 => /fonts?family=Myriad&v=2 nock('http://example.com/').get('/fonts?family=Myriad&v=2').reply(200, 'fonts.css', {'content-type': 'text/css'}); // /?a=1&style-attr.png => /?a=1&style-attr.png - nock('http://example.com/').get('/style-attr.png?a=1&style-attr.png').reply(200, 'style-attr.png', {'content-type': 'text/css'}); + nock('http://example.com/').get('/style-attr.png?a=1&style-attr.png').reply(200, 'style-attr.png'); + // "style-attr2.png" => style-attr2.png + nock('http://example.com/').get('/style-attr2.png').reply(200, 'style-attr2.png'); // /?a=1&b=2 => /?a=1&b=2 nock('http://example.com/').get('/img.png?a=1&b=2').reply(200, 'img.png'); // /test?b=2&c=3&d=4 => /test?b=2&c=3&d=4 @@ -56,10 +58,16 @@ describe('Functional: html entities in url', function() { fs.existsSync(testDirname + '/local/fonts.css').should.be.eql(true); should(fs.readFileSync(testDirname + '/local/fonts.css').toString()).be.eql('fonts.css'); - should(indexHtml).containEql('background: url(\'local/style-attr.png\')'); + // single quote (') replaced with ' in attribute + should(indexHtml).containEql('background: url('local/style-attr.png')'); fs.existsSync(testDirname + '/local/style-attr.png').should.be.eql(true); should(fs.readFileSync(testDirname + '/local/style-attr.png').toString()).be.eql('style-attr.png'); + // double quote (") replaced with " in attribute + should(indexHtml).containEql('background: url("local/style-attr2.png")'); + fs.existsSync(testDirname + '/local/style-attr2.png').should.be.eql(true); + should(fs.readFileSync(testDirname + '/local/style-attr2.png').toString()).be.eql('style-attr2.png'); + should(indexHtml).containEql('img src="local/img.png'); fs.existsSync(testDirname + '/local/img.png').should.be.eql(true); should(fs.readFileSync(testDirname + '/local/img.png').toString()).be.eql('img.png'); diff --git a/test/functional/html-entities-in-url/mocks/index.html b/test/functional/html-entities/mocks/index.html similarity index 84% rename from test/functional/html-entities-in-url/mocks/index.html rename to test/functional/html-entities/mocks/index.html index 18f4e37b..472a2d9c 100644 --- a/test/functional/html-entities-in-url/mocks/index.html +++ b/test/functional/html-entities/mocks/index.html @@ -7,6 +7,7 @@
+
test
diff --git a/test/functional/html-entities-in-url/mocks/style.css b/test/functional/html-entities/mocks/style.css
similarity index 100%
rename from test/functional/html-entities-in-url/mocks/style.css
rename to test/functional/html-entities/mocks/style.css
diff --git a/test/unit/resource-handler/html.test.js b/test/unit/resource-handler/html.test.js
index ea60c38d..42e151a9 100644
--- a/test/unit/resource-handler/html.test.js
+++ b/test/unit/resource-handler/html.test.js
@@ -267,4 +267,27 @@ describe('ResourceHandler: Html', () => {
resource.getText().should.not.containEql('integrity="sha256-X+Q/xqnlEgxCczSjjpp2AUGGgqM5gcBzhRQ0p+EAUEk="');
});
});
+
+ it('should use html entities for updated attributes', () => {
+ const sources = [
+ { selector: '[style]', attr: 'style' },
+ ];
+ downloadChildrenPaths.onFirstCall().resolves('width: 300px; height: 300px; background-image:url("./images/cat.jpg")');
+ htmlHandler = new HtmlHandler({sources}, {downloadChildrenPaths});
+
+ const html = `
+
+
+
+
+
+ `;
+
+ const resource = new Resource('http://example.com', 'index.html');
+ resource.setText(html);
+
+ return htmlHandler.handle(resource).then(() => {
+ resource.getText().should.containEql('style="width: 300px; height: 300px; background-image:url("./images/cat.jpg")"');
+ });
+ });
});