Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 6 additions & 10 deletions lib/scraper.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
var Promise = require('bluebird');
var _ = require('lodash');
var normalizeUrl = require('normalize-url');

var logger = require('./logger');

Expand All @@ -12,6 +11,7 @@ var getFilenameGenerator = require('./filename-generators');
var makeRequest = require('./request');
var getResourceHandler = require('./file-handlers');
var FSAdapter = require('./fs-adaper');
var utils = require('./utils');

function Scraper (options) {
var self = this;
Expand Down Expand Up @@ -52,23 +52,19 @@ Scraper.prototype.getOccupiedFileNames = function getOccupiedFileNames () {
};

Scraper.prototype.addRespondedResourcePromise = function addRespondedResourcePromise (url, promise) {
url = normalizeUrl(url);
this.respondedResourcePromises[url] = promise;
this.respondedResourcePromises[utils.normalizeUrl(url)] = promise;
};

Scraper.prototype.getRespondedResourcePromise = function getRespondedResourcePromise (url) {
url = normalizeUrl(url);
return this.respondedResourcePromises[url];
return this.respondedResourcePromises[utils.normalizeUrl(url)];
};

Scraper.prototype.addLoadedResourcePromise = function addLoadedResourcePromise (url, promise) {
url = normalizeUrl(url);
this.loadedResourcePromises[url] = promise;
this.loadedResourcePromises[utils.normalizeUrl(url)] = promise;
};

Scraper.prototype.getLoadedResourcePromise = function getLoadedResourcePromise (url) {
url = normalizeUrl(url);
return this.loadedResourcePromises[url];
return this.loadedResourcePromises[utils.normalizeUrl(url)];
};

Scraper.prototype.getHtmlSources = function getHtmlSources () {
Expand Down Expand Up @@ -123,7 +119,7 @@ Scraper.prototype.requestResource = function requestResource (resource) {
}).then(function requestCompleted (responseData) {
logger.debug('received response for ' + url);

if (responseData.url !== url) { // Url may be changed in redirects
if (!utils.urlsEqual(responseData.url, url)) { // Url may be changed in redirects
logger.debug('url changed. old url = ' + url + ', new ulr = ' + responseData.url);
resource.setUrl(responseData.url);
self.addRespondedResourcePromise(responseData.url, respondedResourcePromise);
Expand Down
9 changes: 8 additions & 1 deletion lib/utils.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
var url = require('url');
var path = require('path');
var Promise = require('bluebird');
var normalizeUrl = require('normalize-url');

var logger = require('./logger');

Expand Down Expand Up @@ -74,6 +75,10 @@ function waitAllFulfilled (promises) {
}));
}

function urlsEqual (url1, url2) {
return normalizeUrl(url1) === normalizeUrl(url2);
}

module.exports = {
isUrl: isUrl,
getUrl: getUrl,
Expand All @@ -84,5 +89,7 @@ module.exports = {
getFilenameExtension: getFilenameExtension,
getHashFromUrl: getHashFromUrl,
shortenFilename: shortenFilename,
waitAllFulfilled: waitAllFulfilled
waitAllFulfilled: waitAllFulfilled,
normalizeUrl: normalizeUrl,
urlsEqual: urlsEqual
};