From 98f56a95b6176b45e175f4a98a7e697e848860a5 Mon Sep 17 00:00:00 2001 From: s0ph1e Date: Wed, 27 Sep 2017 17:59:13 +0300 Subject: [PATCH 1/3] Add test --- .../mocks/relative-resources-about.html | 12 ++++ .../mocks/relative-resources-index.html | 11 ++++ test/functional/redirect/redirect.test.js | 58 +++++++++++++++++-- 3 files changed, 75 insertions(+), 6 deletions(-) create mode 100644 test/functional/redirect/mocks/relative-resources-about.html create mode 100644 test/functional/redirect/mocks/relative-resources-index.html diff --git a/test/functional/redirect/mocks/relative-resources-about.html b/test/functional/redirect/mocks/relative-resources-about.html new file mode 100644 index 00000000..c1b56a9b --- /dev/null +++ b/test/functional/redirect/mocks/relative-resources-about.html @@ -0,0 +1,12 @@ + + + + + About + + + + + + + \ No newline at end of file diff --git a/test/functional/redirect/mocks/relative-resources-index.html b/test/functional/redirect/mocks/relative-resources-index.html new file mode 100644 index 00000000..f68fb268 --- /dev/null +++ b/test/functional/redirect/mocks/relative-resources-index.html @@ -0,0 +1,11 @@ + + + + + Index + + + + About + + \ No newline at end of file diff --git a/test/functional/redirect/redirect.test.js b/test/functional/redirect/redirect.test.js index 2ddf81e8..ec41b219 100644 --- a/test/functional/redirect/redirect.test.js +++ b/test/functional/redirect/redirect.test.js @@ -1,11 +1,14 @@ +'use strict'; + require('should'); -var nock = require('nock'); -var fs = require('fs-extra'); -var sinon = require('sinon'); -var Scraper = require('../../../lib/scraper'); +const nock = require('nock'); +const fs = require('fs-extra'); +const sinon = require('sinon'); +const Scraper = require('../../../lib/scraper'); +const scrape = require('../../../index'); -var testDirname = __dirname + '/.tmp'; -var mockDirname = __dirname + '/mocks'; +const testDirname = __dirname + '/.tmp'; +const mockDirname = __dirname + '/mocks'; describe('Functional redirects', function() { @@ -60,4 +63,47 @@ describe('Functional redirects', function() { fs.readFileSync(testDirname + '/true-page.html').toString().should.be.eql('true page 1'); }); }); + + it('should correctly handle relative source in redirected page', () => { + const options = { + urls: [ + { url: 'http://example.com', filename: 'index.html'} + ], + directory: testDirname, + subdirectories: [ + { directory: 'css', extensions: ['.css'] } + ], + maxRecursiveDepth: 1, + sources: [ + {selector: 'link', attr: 'href'}, + {selector: 'a', attr: 'href'} + ] + }; + + nock('http://example.com/').get('/').replyWithFile(200, mockDirname + '/relative-resources-index.html'); + nock('http://example.com/').get('/about').reply(301, '', {'Location': 'http://example.com/about/'}); + nock('http://example.com/').get('/about/').replyWithFile(200, mockDirname + '/relative-resources-about.html', {'content-type': 'text/html'}); + nock('http://example.com/').get('/style.css').reply(200, 'style.css'); + nock('http://example.com/').get('/about/style.css').reply(200, 'about/style.css'); + + return scrape(options).then(function() { + fs.existsSync(testDirname + '/index.html').should.be.eql(true); + fs.existsSync(testDirname + '/about.html').should.be.eql(true); + fs.existsSync(testDirname + '/css/style.css').should.be.eql(true); + fs.existsSync(testDirname + '/css/style_1.css').should.be.eql(true); + + const style = fs.readFileSync(testDirname + '/css/style.css').toString(); + style.should.be.eql('style.css'); + + const style_1 = fs.readFileSync(testDirname + '/css/style_1.css').toString(); + style_1.should.be.eql('about/style.css'); + + const index = fs.readFileSync(testDirname + '/index.html').toString(); + index.should.containEql(''); + + const about = fs.readFileSync(testDirname + '/about.html').toString(); + about.should.containEql(''); + about.should.containEql(''); + }); + }); }); From f9b4ce9d22c6222f4a0db9801e373329202e2bc4 Mon Sep 17 00:00:00 2001 From: s0ph1e Date: Wed, 27 Sep 2017 18:09:37 +0300 Subject: [PATCH 2/3] Fix comparing urls --- lib/utils/index.js | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/lib/utils/index.js b/lib/utils/index.js index 23e7ed44..f3cb366c 100644 --- a/lib/utils/index.js +++ b/lib/utils/index.js @@ -3,7 +3,7 @@ const url = require('url'); const path = require('path'); const Promise = require('bluebird'); -const normalizeUrl = require('normalize-url'); +const normalize = require('normalize-url'); const htmlEntities = require('he'); const _ = require('lodash'); const typeByMime = require('../config/resource-type-by-mime'); @@ -66,7 +66,7 @@ function getFilenameFromUrl (u) { * @returns {string} path */ function getFilepathFromUrl (u) { - var nu = normalizeUrl(u); + const nu = normalizeUrl(u, {removeTrailingSlash: true}); return getPathnameFromUrl(nu).substring(1); } @@ -109,6 +109,10 @@ function waitAllFulfilled (promises) { })); } +function normalizeUrl (u, opts) { + return normalize(u, extend({removeTrailingSlash: false}, opts)); +} + function urlsEqual (url1, url2) { return normalizeUrl(url1) === normalizeUrl(url2); } From 7ef0d4a10708bab8b6d46936f7e2cae866d4ecc5 Mon Sep 17 00:00:00 2001 From: s0ph1e Date: Wed, 27 Sep 2017 18:15:37 +0300 Subject: [PATCH 3/3] Add test --- test/unit/utils/utils-test.js | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/test/unit/utils/utils-test.js b/test/unit/utils/utils-test.js index 9024c754..d5c40520 100644 --- a/test/unit/utils/utils-test.js +++ b/test/unit/utils/utils-test.js @@ -220,4 +220,10 @@ describe('Utils', function () { should(utils.decodeHtmlEntities('?a=1&v=2')).be.eql('?a=1&v=2'); }); }); + + describe('#urlsEqual', () => { + it('should return false for /path and /path/', function() { + should(utils.urlsEqual('http://example.com/path', 'http://example.com/path/')).be.eql(false); + }); + }) });