diff --git a/lib/utils/index.js b/lib/utils/index.js
index 23e7ed44..f3cb366c 100644
--- a/lib/utils/index.js
+++ b/lib/utils/index.js
@@ -3,7 +3,7 @@
const url = require('url');
const path = require('path');
const Promise = require('bluebird');
-const normalizeUrl = require('normalize-url');
+const normalize = require('normalize-url');
const htmlEntities = require('he');
const _ = require('lodash');
const typeByMime = require('../config/resource-type-by-mime');
@@ -66,7 +66,7 @@ function getFilenameFromUrl (u) {
* @returns {string} path
*/
function getFilepathFromUrl (u) {
- var nu = normalizeUrl(u);
+ const nu = normalizeUrl(u, {removeTrailingSlash: true});
return getPathnameFromUrl(nu).substring(1);
}
@@ -109,6 +109,10 @@ function waitAllFulfilled (promises) {
}));
}
+function normalizeUrl (u, opts) {
+ return normalize(u, extend({removeTrailingSlash: false}, opts));
+}
+
function urlsEqual (url1, url2) {
return normalizeUrl(url1) === normalizeUrl(url2);
}
diff --git a/test/functional/redirect/mocks/relative-resources-about.html b/test/functional/redirect/mocks/relative-resources-about.html
new file mode 100644
index 00000000..c1b56a9b
--- /dev/null
+++ b/test/functional/redirect/mocks/relative-resources-about.html
@@ -0,0 +1,12 @@
+
+
+
+
+ About
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/test/functional/redirect/mocks/relative-resources-index.html b/test/functional/redirect/mocks/relative-resources-index.html
new file mode 100644
index 00000000..f68fb268
--- /dev/null
+++ b/test/functional/redirect/mocks/relative-resources-index.html
@@ -0,0 +1,11 @@
+
+
+
+
+ Index
+
+
+
+ About
+
+
\ No newline at end of file
diff --git a/test/functional/redirect/redirect.test.js b/test/functional/redirect/redirect.test.js
index 2ddf81e8..ec41b219 100644
--- a/test/functional/redirect/redirect.test.js
+++ b/test/functional/redirect/redirect.test.js
@@ -1,11 +1,14 @@
+'use strict';
+
require('should');
-var nock = require('nock');
-var fs = require('fs-extra');
-var sinon = require('sinon');
-var Scraper = require('../../../lib/scraper');
+const nock = require('nock');
+const fs = require('fs-extra');
+const sinon = require('sinon');
+const Scraper = require('../../../lib/scraper');
+const scrape = require('../../../index');
-var testDirname = __dirname + '/.tmp';
-var mockDirname = __dirname + '/mocks';
+const testDirname = __dirname + '/.tmp';
+const mockDirname = __dirname + '/mocks';
describe('Functional redirects', function() {
@@ -60,4 +63,47 @@ describe('Functional redirects', function() {
fs.readFileSync(testDirname + '/true-page.html').toString().should.be.eql('true page 1');
});
});
+
+ it('should correctly handle relative source in redirected page', () => {
+ const options = {
+ urls: [
+ { url: 'http://example.com', filename: 'index.html'}
+ ],
+ directory: testDirname,
+ subdirectories: [
+ { directory: 'css', extensions: ['.css'] }
+ ],
+ maxRecursiveDepth: 1,
+ sources: [
+ {selector: 'link', attr: 'href'},
+ {selector: 'a', attr: 'href'}
+ ]
+ };
+
+ nock('http://example.com/').get('/').replyWithFile(200, mockDirname + '/relative-resources-index.html');
+ nock('http://example.com/').get('/about').reply(301, '', {'Location': 'http://example.com/about/'});
+ nock('http://example.com/').get('/about/').replyWithFile(200, mockDirname + '/relative-resources-about.html', {'content-type': 'text/html'});
+ nock('http://example.com/').get('/style.css').reply(200, 'style.css');
+ nock('http://example.com/').get('/about/style.css').reply(200, 'about/style.css');
+
+ return scrape(options).then(function() {
+ fs.existsSync(testDirname + '/index.html').should.be.eql(true);
+ fs.existsSync(testDirname + '/about.html').should.be.eql(true);
+ fs.existsSync(testDirname + '/css/style.css').should.be.eql(true);
+ fs.existsSync(testDirname + '/css/style_1.css').should.be.eql(true);
+
+ const style = fs.readFileSync(testDirname + '/css/style.css').toString();
+ style.should.be.eql('style.css');
+
+ const style_1 = fs.readFileSync(testDirname + '/css/style_1.css').toString();
+ style_1.should.be.eql('about/style.css');
+
+ const index = fs.readFileSync(testDirname + '/index.html').toString();
+ index.should.containEql('');
+
+ const about = fs.readFileSync(testDirname + '/about.html').toString();
+ about.should.containEql('');
+ about.should.containEql('');
+ });
+ });
});
diff --git a/test/unit/utils/utils-test.js b/test/unit/utils/utils-test.js
index 9024c754..d5c40520 100644
--- a/test/unit/utils/utils-test.js
+++ b/test/unit/utils/utils-test.js
@@ -220,4 +220,10 @@ describe('Utils', function () {
should(utils.decodeHtmlEntities('?a=1&v=2')).be.eql('?a=1&v=2');
});
});
+
+ describe('#urlsEqual', () => {
+ it('should return false for /path and /path/', function() {
+ should(utils.urlsEqual('http://example.com/path', 'http://example.com/path/')).be.eql(false);
+ });
+ })
});