diff --git a/README.md b/README.md index b5f7a529..de6b84cf 100644 --- a/README.md +++ b/README.md @@ -22,19 +22,19 @@ npm install website-scraper ## Usage ```javascript -var scraper = require('website-scraper'); +var scrape = require('website-scraper'); var options = { urls: ['http://nodejs.org/'], directory: '/path/to/save/', }; // with callback -scraper.scrape(options, function (error, result) { +scrape(options, function (error, result) { /* some code here */ }); // or with promise -scraper.scrape(options).then(function (result) { +scrape(options).then(function (result) { /* some code here */ }); ``` @@ -98,8 +98,8 @@ and separate files into directories: - `css` for .css (full path `/path/to/save/css`) ```javascript -var scraper = require('website-scraper'); -scraper.scrape({ +var scrape = require('website-scraper'); +scrape({ urls: [ 'http://nodejs.org/', // Will be saved with default filename 'index.html' {url: 'http://nodejs.org/about', filename: 'about.html'}, @@ -132,8 +132,8 @@ scraper.scrape({ ```javascript // Links from example.com will be followed // Links from links will be ignored because theirs depth = 2 is greater than maxDepth -var scraper = require('website-scraper'); -scraper.scrape({ +var scrape = require('website-scraper'); +scrape({ urls: ['http://example.com/'], directory: '/path/to/save', recursive: true, @@ -144,8 +144,8 @@ scraper.scrape({ #### Example 3. Filtering out external resources ```javascript // Links to other websites are filtered out by the urlFilter -var scraper = require('website-scraper'); -scraper.scrape({ +var scrape = require('website-scraper'); +scrape({ urls: ['http://example.com/'], urlFilter: function(url){ return url.indexOf('http://example.com') === 0; @@ -159,8 +159,8 @@ scraper.scrape({ // Downloads all the crawlable files of example.com. // The files are saved in the same structure as the structure of the website, by using the `bySiteStructure` filenameGenerator. // Links to other websites are filtered out by the urlFilter -var scraper = require('website-scraper'); -scraper.scrape({ +var scrape = require('website-scraper'); +scrape({ urls: ['http://example.com/'], urlFilter: function(url){ return url.indexOf('http://example.com') === 0; diff --git a/index.js b/index.js index 0719fa64..70b8cd47 100644 --- a/index.js +++ b/index.js @@ -1,5 +1,5 @@ var Scraper = require('./lib/scraper.js'); -module.exports.scrape = function scrape (options, callback) { +module.exports = function scrape (options, callback) { return new Scraper(options).scrape(callback); }; diff --git a/test/e2e/e2e-test.js b/test/e2e/e2e-test.js index 5aab4836..9d52bb7a 100644 --- a/test/e2e/e2e-test.js +++ b/test/e2e/e2e-test.js @@ -1,5 +1,5 @@ var should = require('should'); -var scraper = require('../../index'); +var scrape = require('../../index'); var URL = require('url'); var fs = require('fs-extra'); var _ = require('lodash'); @@ -26,7 +26,7 @@ describe('E2E', function() { scraperOptions.directory = resultDirname + '/' + hostname + '-byType'; scraperOptions.urls = [ { url: url, filename: 'index.html' } ]; scraperOptions.filenameGenerator = 'byType'; - return scraper.scrape(scraperOptions).then(function(result) { + return scrape(scraperOptions).then(function(result) { result.should.be.ok(); }); }); @@ -37,7 +37,7 @@ describe('E2E', function() { scraperOptions.directory = resultDirname + '/' + hostname + '-bySiteStructure'; scraperOptions.urls = [ { url: url } ]; scraperOptions.filenameGenerator = 'bySiteStructure'; - return scraper.scrape(scraperOptions).then(function(result) { + return scrape(scraperOptions).then(function(result) { result.should.be.ok(); }); }); diff --git a/test/functional/base/base.test.js b/test/functional/base/base.test.js index 7ff76ee1..d75e1ed3 100644 --- a/test/functional/base/base.test.js +++ b/test/functional/base/base.test.js @@ -2,7 +2,7 @@ require('should'); var nock = require('nock'); var fs = require('fs-extra'); var cheerio = require('cheerio'); -var scraper = require('../../../index'); +var scrape = require('../../../index'); var Resource = require('../../../lib/resource'); var testDirname = __dirname + '/.tmp'; @@ -65,7 +65,7 @@ describe('Functional base', function() { // mocks for blog.html nock('http://blog.example.com/').get('/files/fail-1.png').replyWithError('something awful happened'); - return scraper.scrape(options).then(function(result) { + return scrape(options).then(function(result) { // should return right result result.should.be.instanceOf(Array).and.have.length(3); diff --git a/test/functional/circular-dependencies/circular-dependencies.test.js b/test/functional/circular-dependencies/circular-dependencies.test.js index 793eed27..f084601b 100644 --- a/test/functional/circular-dependencies/circular-dependencies.test.js +++ b/test/functional/circular-dependencies/circular-dependencies.test.js @@ -1,7 +1,7 @@ require('should'); var nock = require('nock'); var fs = require('fs-extra'); -var scraper = require('../../../index'); +var scrape = require('../../../index'); var testDirname = __dirname + '/.tmp'; var mockDirname = __dirname + '/mocks'; @@ -38,7 +38,7 @@ describe('Functional circular dependencies', function() { nock('http://example.com/').get('/style.css').replyWithFile(200, mockDirname + '/style.css'); nock('http://example.com/').get('/style2.css').replyWithFile(200, mockDirname + '/style2.css'); - return scraper.scrape(options).then(function() { + return scrape(options).then(function() { fs.existsSync(testDirname + '/index.html').should.be.eql(true); fs.existsSync(testDirname + '/about.html').should.be.eql(true); fs.existsSync(testDirname + '/style.css').should.be.eql(true); diff --git a/test/functional/css-handling/css-handling.test.js b/test/functional/css-handling/css-handling.test.js index c8a49f29..5ca7cb5b 100644 --- a/test/functional/css-handling/css-handling.test.js +++ b/test/functional/css-handling/css-handling.test.js @@ -1,7 +1,7 @@ var should = require('should'); var nock = require('nock'); var fs = require('fs-extra'); -var Scraper = require('../../../lib/scraper'); +var scrape = require('../../../index'); var testDirname = __dirname + '/.tmp'; var mockDirname = __dirname + '/mocks'; @@ -38,9 +38,8 @@ describe('Functional: css handling', function() { { directory: 'local', extensions: ['.png', '.css'] } ] }; - var scraper = new Scraper(options); - return scraper.scrape(options).then(function() { + return scrape(options).then(function() { fs.existsSync(testDirname + '/index.html').should.be.eql(true); fs.existsSync(testDirname + '/local/style.css').should.be.eql(true); fs.existsSync(testDirname + '/local/style-import-1.css').should.be.eql(true); diff --git a/test/functional/html-entities-in-url/html-entities-in-url.test.js b/test/functional/html-entities-in-url/html-entities-in-url.test.js index 153dc377..fca77030 100644 --- a/test/functional/html-entities-in-url/html-entities-in-url.test.js +++ b/test/functional/html-entities-in-url/html-entities-in-url.test.js @@ -1,7 +1,7 @@ var should = require('should'); var nock = require('nock'); var fs = require('fs-extra'); -var Scraper = require('../../../lib/scraper'); +var scrape = require('../../../index'); var testDirname = __dirname + '/.tmp'; var mockDirname = __dirname + '/mocks'; @@ -47,9 +47,8 @@ describe('Functional: html entities in url', function() { ], ignoreErrors: false }; - var scraper = new Scraper(options); - return scraper.scrape(options).then(function() { + return scrape(options).then(function() { fs.existsSync(testDirname + '/index.html').should.be.eql(true); var indexHtml = fs.readFileSync(testDirname + '/index.html').toString(); diff --git a/test/functional/html-id-href/html-id-href.test.js b/test/functional/html-id-href/html-id-href.test.js index a7651510..506357b7 100644 --- a/test/functional/html-id-href/html-id-href.test.js +++ b/test/functional/html-id-href/html-id-href.test.js @@ -1,7 +1,7 @@ var should = require('should'); var nock = require('nock'); var fs = require('fs-extra'); -var Scraper = require('../../../lib/scraper'); +var scrape = require('../../../index'); var testDirname = __dirname + '/.tmp'; var mockDirname = __dirname + '/mocks'; @@ -39,9 +39,8 @@ describe('Functional html id href', function() { { directory: 'local', extensions: ['.png', '.svg'] } ] }; - var scraper = new Scraper(options); - return scraper.scrape(options).then(function() { + return scrape(options).then(function() { fs.existsSync(testDirname + '/index.html').should.be.eql(true); fs.existsSync(testDirname + '/other.html').should.be.eql(true); fs.existsSync(testDirname + '/local/sprite.svg').should.be.eql(true); diff --git a/test/functional/recursive/recursive.test.js b/test/functional/recursive/recursive.test.js index 2e53e469..c9b1979d 100644 --- a/test/functional/recursive/recursive.test.js +++ b/test/functional/recursive/recursive.test.js @@ -1,7 +1,7 @@ require('should'); var nock = require('nock'); var fs = require('fs-extra'); -var scraper = require('../../../index'); +var scrape = require('../../../index'); var testDirname = __dirname + '/.tmp'; var mockDirname = __dirname + '/mocks'; @@ -36,7 +36,7 @@ describe('Functional recursive downloading', function() { nock('http://example.com/').get('/link2.html').reply(200, 'content 2'); nock('http://example.com/').get('/link3.html').reply(200, 'content 3'); - return scraper.scrape(options).then(function() { + return scrape(options).then(function() { fs.existsSync(testDirname + '/index.html').should.be.eql(true); // index.html anchors loaded @@ -73,7 +73,7 @@ describe('Functional recursive downloading', function() { nock('http://example.com/').get('/link1-1.html').reply(200, 'content 1-1'); nock('http://example.com/').get('/link1-2.html').reply(200, 'content 1-2'); - return scraper.scrape(options).then(function() { + return scrape(options).then(function() { fs.existsSync(testDirname + '/index.html').should.be.eql(true); // index.html anchors loaded (depth 1) @@ -106,7 +106,7 @@ describe('Functional recursive downloading', function() { nock('http://example.com/').get('/link2.html').reply(200, 'content 2'); nock('http://example.com/').get('/link3.html').reply(200, 'content 3'); - return scraper.scrape(options).then(function() { + return scrape(options).then(function() { fs.existsSync(testDirname + '/index.html').should.be.eql(true); // index.html anchors loaded diff --git a/test/functional/redirect/redirect.test.js b/test/functional/redirect/redirect.test.js index a28724fa..322e1fa4 100644 --- a/test/functional/redirect/redirect.test.js +++ b/test/functional/redirect/redirect.test.js @@ -43,7 +43,7 @@ describe('Functional redirects', function() { var scraper = new Scraper(options); var loadToFsSpy = sinon.spy(scraper.fsAdapter, 'saveResource'); - return scraper.scrape(options).then(function() { + return scraper.scrape().then(function() { loadToFsSpy.callCount.should.be.eql(2); loadToFsSpy.args[0][0].filename.should.be.eql('index.html'); loadToFsSpy.args[1][0].filename.should.be.eql('true-page.html'); diff --git a/test/functional/resource-without-ext/resource-without-ext.test.js b/test/functional/resource-without-ext/resource-without-ext.test.js index 4441b3ac..3f33fb21 100644 --- a/test/functional/resource-without-ext/resource-without-ext.test.js +++ b/test/functional/resource-without-ext/resource-without-ext.test.js @@ -1,7 +1,7 @@ require('should'); var nock = require('nock'); var fs = require('fs-extra'); -var scraper = require('../../../index'); +var scrape = require('../../../index'); var testDirname = __dirname + '/.tmp'; var mockDirname = __dirname + '/mocks'; @@ -51,7 +51,7 @@ describe('Functional resources without extensions', function() { nock('http://google.com').get('/').replyWithFile(200, mockDirname + '/google.html'); nock('http://google.com').get('/google.png').reply(200, 'OK'); - return scraper.scrape(options).then(function() { + return scrape(options).then(function() { // should load css file and fonts from css file fs.existsSync(testDirname + '/css.css').should.be.eql(true); // http://fonts.googleapis.com/css?family=Lato fs.existsSync(testDirname + '/UyBMtLsHKBKXelqf4x7VRQ.woff2').should.be.eql(true);