Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ node_js:
- '4'
- '5'
- '6'
- '7'
after_success:
- codeclimate-test-reporter < coverage/lcov.info
- coveralls < coverage/lcov.info
14 changes: 14 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ scrape(options, (error, result) => {
* [urlFilter](#urlfilter) - skip some urls
* [filenameGenerator](#filenamegenerator) - generate filename for downloaded resource
* [httpResponseHandler](#httpresponsehandler) - customize http response handling
* [resourceSaver](#resourcesaver) - customize resources saving
* [onResourceSaved](#onresourcesaved) - callback called when resource is saved
* [onResourceError](#onresourceerror) - callback called when resource's downloading is failed

Expand Down Expand Up @@ -211,6 +212,19 @@ scrape({
```
Scrape function resolves with array of [Resource](https://github.com/s0ph1e/node-website-scraper/blob/master/lib/resource.js) objects which contain `metadata` property from `httpResponseHandler`.

#### resourceSaver
Class which saves [Resources](https://github.com/s0ph1e/node-website-scraper/blob/master/lib/resource.js), should have methods `saveResource` and `errorCleanup` which return Promises. Use it to save files where you need: to dropbox, amazon S3, existing directory, etc. By default all files are saved in local file system to new directory passed in `directory` option (see [lib/resource-saver/index.js](https://github.com/s0ph1e/node-website-scraper/blob/master/lib/resource-saver/index.js)).
```javascript
scrape({
urls: ['http://example.com/'],
directory: '/path/to/save',
resourceSaver: class MyResourceSaver {
saveResource (resource) {/* code to save file where you need */}
errorCleanup (err) {/* code to remove all previously saved files in case of error */}
}
}).then(console.log).catch(console.log);
```

#### onResourceSaved
Function called each time when resource is saved to file system. Callback is called with [Resource](https://github.com/s0ph1e/node-website-scraper/blob/master/lib/resource.js) object. Defaults to `null` - no callback will be called.
```javascript
Expand Down
1 change: 1 addition & 0 deletions appveyor.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ environment:
- nodejs_version: "4"
- nodejs_version: "5"
- nodejs_version: "6"
- nodejs_version: "7"

install:
- ps: Install-Product node $env:nodejs_version
Expand Down
11 changes: 8 additions & 3 deletions index.js
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
var Scraper = require('./lib/scraper.js');
'use strict';

module.exports = function scrape (options, callback) {
return new Scraper(options).scrape(callback);
const Promise = require('bluebird');
const Scraper = require('./lib/scraper.js');

module.exports = (options, callback) => {
return Promise.try(() => {
return new Scraper(options).scrape(callback);
});
};

module.exports.defaults = Scraper.defaults;
6 changes: 4 additions & 2 deletions lib/config/defaults.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
var config = {
'use strict';
const config = {
filenameGenerator: 'byType',
defaultFilename: 'index.html',
prettifyUrls: false,
Expand Down Expand Up @@ -51,7 +52,8 @@ var config = {
ignoreErrors: true,
httpResponseHandler: null,
onResourceSaved: null,
onResourceError: null
onResourceError: null,
resourceSaver: null
};

module.exports = config;
59 changes: 0 additions & 59 deletions lib/fs-adaper.js

This file was deleted.

72 changes: 72 additions & 0 deletions lib/resource-saver/index.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
'use strict';

const path = require('path');
const _ = require('lodash');
const Promise = require('bluebird');

const fs = require('fs-extra');
const outputFileAsync = Promise.promisify(fs.outputFile);
const removeAsync = Promise.promisify(fs.remove);

const supportedOptions = [ 'directory' ];

class ResourceSaver {
constructor (options) {
this.options = _.pick(options, supportedOptions);

if (!this.options.directory || typeof this.options.directory !== 'string') {
throw new Error('Incorrect directory ' + this.options.directory);
}

this.absoluteDirectoryPath = path.resolve(process.cwd(), this.options.directory);

if (exists(this.absoluteDirectoryPath)) {
throw new Error('Directory ' + this.absoluteDirectoryPath + ' exists');
}

this.loadedResources = [];
}

/**
* Save resource to file system
* @param {Resource} resource
* @returns {Promise}
*/
saveResource (resource) {
const filename = path.join(this.absoluteDirectoryPath, resource.getFilename());
const text = resource.getText();
return outputFileAsync(filename, text, { encoding: 'binary' }).then(() => {
this.loadedResources.push(resource);
});
}

/**
* Remove all files that were saved before
* @returns {Promise}
*/
errorCleanup () {
if (!_.isEmpty(this.loadedResources)) {
return removeAsync(this.absoluteDirectoryPath);
}
return Promise.resolve();
}
}

function exists (path) {
let exists;
try {
if (fs.statSync(path)) {
exists = true;
}
} catch (e) {
if (e.code === 'ENOENT') {
exists = false;
} else {
throw e;
}
}

return exists;
}

module.exports = ResourceSaver;
Loading