Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion lib/resource-handler/html/html-source-element.js
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,9 @@ class HtmlSourceElement {
* @param {string} newData
*/
setData (newData) {
this.rule.attr ? this.el.attr(this.rule.attr, newData) : this.el.text(newData);
// todo: encode can be removed after https://github.com/cheeriojs/cheerio/issues/957 fixed
const escapedData = utils.encodeHtmlEntities(newData);
this.rule.attr ? this.el.attr(this.rule.attr, escapedData) : this.el.text(newData);
}

removeIntegrityCheck () {
Expand Down
5 changes: 5 additions & 0 deletions lib/utils/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,10 @@ function decodeHtmlEntities (text) {
return typeof text === 'string' ? htmlEntities.decode(text) : '';
}

function encodeHtmlEntities (text) {
return typeof text === 'string' ? htmlEntities.escape(text) : '';
}

function clone (obj) {
return Object.assign({}, obj);
}
Expand Down Expand Up @@ -188,6 +192,7 @@ module.exports = {
getTypeByMime,
getTypeByFilename,
decodeHtmlEntities,
encodeHtmlEntities,
clone,
extend,
union,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ var scrape = require('../../../index');
var testDirname = __dirname + '/.tmp';
var mockDirname = __dirname + '/mocks';

describe('Functional: html entities in url', function() {
describe('Functional: html entities', function() {

beforeEach(function() {
nock.cleanAll();
Expand All @@ -27,7 +27,9 @@ describe('Functional: html entities in url', function() {
// /fonts?family=Myriad&v=2 => /fonts?family=Myriad&v=2
nock('http://example.com/').get('/fonts?family=Myriad&v=2').reply(200, 'fonts.css', {'content-type': 'text/css'});
// /?a=1&style-attr.png => /?a=1&style-attr.png
nock('http://example.com/').get('/style-attr.png?a=1&style-attr.png').reply(200, 'style-attr.png', {'content-type': 'text/css'});
nock('http://example.com/').get('/style-attr.png?a=1&style-attr.png').reply(200, 'style-attr.png');
// "style-attr2.png" => style-attr2.png
nock('http://example.com/').get('/style-attr2.png').reply(200, 'style-attr2.png');
// /?a=1&b=2 => /?a=1&b=2
nock('http://example.com/').get('/img.png?a=1&b=2').reply(200, 'img.png');
// /test?b=2&c=3&d=4 => /test?b=2&c=3&d=4
Expand Down Expand Up @@ -56,10 +58,16 @@ describe('Functional: html entities in url', function() {
fs.existsSync(testDirname + '/local/fonts.css').should.be.eql(true);
should(fs.readFileSync(testDirname + '/local/fonts.css').toString()).be.eql('fonts.css');

should(indexHtml).containEql('background: url(\'local/style-attr.png\')');
// single quote (') replaced with ' in attribute
should(indexHtml).containEql('background: url('local/style-attr.png')');
fs.existsSync(testDirname + '/local/style-attr.png').should.be.eql(true);
should(fs.readFileSync(testDirname + '/local/style-attr.png').toString()).be.eql('style-attr.png');

// double quote (") replaced with " in attribute
should(indexHtml).containEql('background: url("local/style-attr2.png")');
fs.existsSync(testDirname + '/local/style-attr2.png').should.be.eql(true);
should(fs.readFileSync(testDirname + '/local/style-attr2.png').toString()).be.eql('style-attr2.png');

should(indexHtml).containEql('img src="local/img.png');
fs.existsSync(testDirname + '/local/img.png').should.be.eql(true);
should(fs.readFileSync(testDirname + '/local/img.png').toString()).be.eql('img.png');
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
</head>
<body>
<div style="background: url('http://example.com/style-attr.png?a=1&amp;style-attr.png')"></div>
<div style="background: url(&quot;http://example.com/style-attr2.png&quot;)"></div>
<img src="http://example.com/img.png?a=1&amp;b=2" />
<a href="?b=2&amp;c=3&amp;d=4">test</a>
</body>
Expand Down
23 changes: 23 additions & 0 deletions test/unit/resource-handler/html.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -267,4 +267,27 @@ describe('ResourceHandler: Html', () => {
resource.getText().should.not.containEql('integrity="sha256-X+Q/xqnlEgxCczSjjpp2AUGGgqM5gcBzhRQ0p+EAUEk="');
});
});

it('should use html entities for updated attributes', () => {
const sources = [
{ selector: '[style]', attr: 'style' },
];
downloadChildrenPaths.onFirstCall().resolves('width: 300px; height: 300px; background-image:url("./images/cat.jpg")');
htmlHandler = new HtmlHandler({sources}, {downloadChildrenPaths});

const html = `
<html>
<body>
<div style="width: 300px; height: 300px; background-image:url(&quot;http://example.com/cat.jpg&quot;)"></div>
</body>
</html>
`;

const resource = new Resource('http://example.com', 'index.html');
resource.setText(html);

return htmlHandler.handle(resource).then(() => {
resource.getText().should.containEql('style="width: 300px; height: 300px; background-image:url(&quot;./images/cat.jpg&quot;)"');
});
});
});