Skip to content

Commit

Permalink
fix: add failed requests to table, like http 204
Browse files Browse the repository at this point in the history
  • Loading branch information
popstas committed Aug 25, 2022
1 parent d697ff6 commit 61844de
Showing 1 changed file with 37 additions and 3 deletions.
40 changes: 37 additions & 3 deletions src/scrap-site.js
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,8 @@ module.exports = async (baseUrl, options = {}) => {
separator: ';',
});

const failedUrls = [];

let crawler;
const defaultOptions = {
allowedDomains: options.limitDomain ? [domain] : undefined,
Expand Down Expand Up @@ -345,12 +347,44 @@ module.exports = async (baseUrl, options = {}) => {
});

page.on('requestfailed', request => {
// skip adv errors
if (request.url().includes('googlesyndication')
|| request.url().includes('googleads')
|| request.url().includes('adfox')
|| request.url().includes('an.yandex.ru')
|| request.url().includes('nativeroll.tv')
) {
return;
}

if (request.notHTTPS) {
console.error(
`${color.red}${crawler._options.url}: mixed content: ${request.url()}${color.reset}`);
} else {
const isStatic = ['image', 'script', 'stylesheet'].includes(request.resourceType());
if (!isStatic) console.log('Request failed: ', request.url() + ' ' + request.failure().errorText);
return;
}

const isStatic = ['image', 'script', 'stylesheet'].includes(request.resourceType());
if (!isStatic) {
console.error(`Request failed: ${request.response()?.status()}, ${request.url()}, ${request.failure().errorText}`);

// add to result table when first error
if (!failedUrls.includes(crawler._options.url)) {
failedUrls.push(crawler._options.url);
const result = {
options: {},
depth: 0,
previousUrl: '',
response: {
url: crawler._options.url,
status: request.response()?.status(),
},
redirectChain: [],
screenshot: null,
cookies: [],
links: [],
};
exporter.writeLine(result);
}
}
});

Expand Down

0 comments on commit 61844de

Please sign in to comment.