Skip to content

Commit

Permalink
feat: ignore css and js
Browse files Browse the repository at this point in the history
  • Loading branch information
popstas committed Mar 5, 2020
1 parent de18f1e commit a203275
Showing 1 changed file with 11 additions and 2 deletions.
13 changes: 11 additions & 2 deletions src/scrap-site.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,11 @@ const url = require('url');
const DEBUG = true; // выключить, если не нужны console.log на каждый запрос (не будет видно прогресс)
const docs = ['doc', 'docx', 'xls', 'xlsx', 'pdf', 'rar', 'zip']; // можно дополнять

// запреты браузеру на подгрузку статики, ускоряет
const IGNORE_IMAGES = true;
const IGNORE_CSS = true;
const IGNORE_JS = true;

// поля описаны в API по ссылке выше
const fields = ['response.url', 'depth']; // стандартный комплект
// const fields = ['response.url', 'depth', 'response.status', 'result.title', 'result.description', 'result.keywords', 'result.canonical', 'result.og_title', 'result.og_image']; // полный комплект
Expand Down Expand Up @@ -65,8 +70,12 @@ module.exports = async (baseUrl, options) => {
page.on('request', request => {
//console.log('request.url(): ', request.url());

// don't request image
if (request.resourceType() == 'image') {
// don't request static
if (IGNORE_IMAGES && request.resourceType() == 'image') {
request.abort();
} else if (IGNORE_CSS && request.resourceType() == 'stylesheet') {
request.abort();
} else if (IGNORE_JS && request.resourceType() == 'script') {
request.abort();
} else {
request.continue();
Expand Down

0 comments on commit a203275

Please sign in to comment.