diff --git a/lib/scraper.js b/lib/scraper.js index bfc1bbfa..f40cea55 100644 --- a/lib/scraper.js +++ b/lib/scraper.js @@ -1,6 +1,5 @@ var Promise = require('bluebird'); var _ = require('lodash'); -var normalizeUrl = require('normalize-url'); var logger = require('./logger'); @@ -12,6 +11,7 @@ var getFilenameGenerator = require('./filename-generators'); var makeRequest = require('./request'); var getResourceHandler = require('./file-handlers'); var FSAdapter = require('./fs-adaper'); +var utils = require('./utils'); function Scraper (options) { var self = this; @@ -52,23 +52,19 @@ Scraper.prototype.getOccupiedFileNames = function getOccupiedFileNames () { }; Scraper.prototype.addRespondedResourcePromise = function addRespondedResourcePromise (url, promise) { - url = normalizeUrl(url); - this.respondedResourcePromises[url] = promise; + this.respondedResourcePromises[utils.normalizeUrl(url)] = promise; }; Scraper.prototype.getRespondedResourcePromise = function getRespondedResourcePromise (url) { - url = normalizeUrl(url); - return this.respondedResourcePromises[url]; + return this.respondedResourcePromises[utils.normalizeUrl(url)]; }; Scraper.prototype.addLoadedResourcePromise = function addLoadedResourcePromise (url, promise) { - url = normalizeUrl(url); - this.loadedResourcePromises[url] = promise; + this.loadedResourcePromises[utils.normalizeUrl(url)] = promise; }; Scraper.prototype.getLoadedResourcePromise = function getLoadedResourcePromise (url) { - url = normalizeUrl(url); - return this.loadedResourcePromises[url]; + return this.loadedResourcePromises[utils.normalizeUrl(url)]; }; Scraper.prototype.getHtmlSources = function getHtmlSources () { @@ -123,7 +119,7 @@ Scraper.prototype.requestResource = function requestResource (resource) { }).then(function requestCompleted (responseData) { logger.debug('received response for ' + url); - if (responseData.url !== url) { // Url may be changed in redirects + if (!utils.urlsEqual(responseData.url, url)) { // Url may be changed in redirects logger.debug('url changed. old url = ' + url + ', new ulr = ' + responseData.url); resource.setUrl(responseData.url); self.addRespondedResourcePromise(responseData.url, respondedResourcePromise); diff --git a/lib/utils.js b/lib/utils.js index 84099c7f..d9800de5 100644 --- a/lib/utils.js +++ b/lib/utils.js @@ -1,6 +1,7 @@ var url = require('url'); var path = require('path'); var Promise = require('bluebird'); +var normalizeUrl = require('normalize-url'); var logger = require('./logger'); @@ -74,6 +75,10 @@ function waitAllFulfilled (promises) { })); } +function urlsEqual (url1, url2) { + return normalizeUrl(url1) === normalizeUrl(url2); +} + module.exports = { isUrl: isUrl, getUrl: getUrl, @@ -84,5 +89,7 @@ module.exports = { getFilenameExtension: getFilenameExtension, getHashFromUrl: getHashFromUrl, shortenFilename: shortenFilename, - waitAllFulfilled: waitAllFulfilled + waitAllFulfilled: waitAllFulfilled, + normalizeUrl: normalizeUrl, + urlsEqual: urlsEqual };