Skip to content

Commit

Permalink
fix: suppress headless-chrome-crawler exceptions after max requests r…
Browse files Browse the repository at this point in the history
…eached
  • Loading branch information
popstas committed Mar 11, 2021
1 parent e5fef0b commit ac374b3
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 11 deletions.
14 changes: 7 additions & 7 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
"author": "Stanislav Popov",
"license": "ISC",
"dependencies": {
"@popstas/headless-chrome-crawler": "^1.8.2",
"@popstas/headless-chrome-crawler": "^1.8.3",
"@popstas/xlsx-style": "^0.8.20",
"axios": "^0.20.0",
"body-parser": "^1.19.0",
Expand Down
17 changes: 14 additions & 3 deletions src/scrap-site.js
Original file line number Diff line number Diff line change
Expand Up @@ -498,14 +498,18 @@ module.exports = async (baseUrl, options = {}) => {
}

crawler.on('requeststarted', async opts => {
if (crawler._browser._connection._closed) return; // catch error after scan

const queueCount = await crawler.queueSize();
requestedCount = crawler.requestedCount() + 1;
if (options.cancel) {
crawler.setMaxRequest(requestedCount); // cancel command
}
// console.log('crawler: ', crawler);
log(`${requestedCount} ${decodeURI(opts.url)} (${queueCount})`);
});
crawler.on('requestfailed', error => {
if (crawler._browser._connection._closed) return; // catch error after scan
console.error(
`${color.red}Failed: ${decodeURI(error.options.url)}${color.reset}`);
});
Expand All @@ -517,8 +521,9 @@ module.exports = async (baseUrl, options = {}) => {
if (options.maxDepth > 1) console.log(`${color.yellow}Max depth reached${color.reset}`);
});
crawler.on('maxrequestreached', options => {
console.log(
`\n\n${color.yellow}Max requests reached\nPlease, ignore this error:${color.reset}`);
if (crawler._browser._connection._closed) return; // catch error after scan
console.log(`\n\n${color.yellow}Max requests reached${color.reset}`);
// console.log(`${color.yellow}Please, ignore this error:${color.reset}`);
});

if (options.urlList) {
Expand Down Expand Up @@ -591,8 +596,14 @@ module.exports = async (baseUrl, options = {}) => {
webPath = await uploadJson(jsonPath);
socketSend(options.socket, 'result', {json: webPath});
}

const mins = Number(t / 60).toFixed(1);
log(`Finish: ${mins} mins (${perPage} sec per page)`, options.socket);

// return stats
return {
time: t,
perPage: perPage,
pages: requestedCount,
}
}
Expand All @@ -603,7 +614,7 @@ module.exports = async (baseUrl, options = {}) => {
if (options.removeCsv) fs.unlinkSync(csvPath);

const mins = Number(t / 60).toFixed(1);
console.log(`Finish: ${mins} mins (${perPage} sec per page)`);
log(`Finish: ${mins} mins (${perPage} sec per page)`, options.socket);
};

const tryFinish = async (tries) => {
Expand Down

0 comments on commit ac374b3

Please sign in to comment.