Skip to content

Commit

Permalink
feat: incremental saving of scan progress, add result.error, better p…
Browse files Browse the repository at this point in the history
…age load error detection, scan document exist
  • Loading branch information
popstas committed Mar 15, 2024
1 parent 902ba18 commit 69894c0
Show file tree
Hide file tree
Showing 7 changed files with 166 additions and 48 deletions.
45 changes: 30 additions & 15 deletions src/actions/saveAsJson.js
Original file line number Diff line number Diff line change
Expand Up @@ -49,14 +49,38 @@ export default async ({csvPath, jsonPath, lang, preset, defaultFilter, url, args

// filter empty redirected items
console.log("data.items before filter:", data.items.length);
data.items = data.items.filter((item, i, self) => {
const hasBetter = self.filter((t, i2) => {
if (t.redirected_from === item.url) return true;
data.items = filterItems(data.items); // TODO: uncomment

if (i < i2) return false; // exclude previous items
// write
const raw = JSON.stringify(data);
fs.writeFileSync(jsonPath, raw);

const msg = `Saved ${data.items.length} items` + (itemsPartial.length > 0 ? `, including ${itemsPartial.length} previous items` : '');
console.log("saveAsJson:", msg);

return data;
}

function filterItems(items) {
return items.filter((item, i, self) => {
const hasBetter = self.filter((item2, i2) => {
if (item2.redirected_from === item.url) {
// console.log("i2 excluded 1:", i2);
return true;
}

if (i > i2) return false; // exclude previous items
if (i === i2) return false; // exclude self
if (t.url === item.url) return true;
if (item2.url === item.url) {
// console.log("i2 is better 2:", i2);
// console.log("i excluded 2:", i);
return true;
}
});
/*if (hasBetter.length > 0) {
console.log(`${i} has better:`, hasBetter);
}*/

return hasBetter.length === 0;
// return last element of hasBetter
/*if (hasBetter.length === 0) {
Expand All @@ -69,16 +93,7 @@ export default async ({csvPath, jsonPath, lang, preset, defaultFilter, url, args
return !isNotEmpty;
}*/
});

// write
const raw = JSON.stringify(data);
fs.writeFileSync(jsonPath, raw);

const msg = `Saved ${data.items.length} items` + (itemsPartial.length > 0 ? `, including ${itemsPartial.length} previous items` : '');
console.log("saveAsJson:", msg);

return data;
};
}

function flattenItems(items) {
// flatten items
Expand Down
17 changes: 10 additions & 7 deletions src/config.js
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
const fs = require('fs');

import fs from 'fs';
import { homedir } from 'os';
let config = {};
const homedir = require('os').homedir();
const configPath = `${homedir}/.site-audit-seo.conf.js`;
const configPath = `${homedir()}/.site-audit-seo.conf.js`;
if (fs.existsSync(configPath)) {
config = require(configPath);
const fileData = await import(configPath);
if (fileData) config = fileData.default;
// console.log("config:", config);
}

module.exports = config;
config = {...config, ...{
maxConcurrency: process.env.MAX_CONCURRENCY || 0,
}}
export default config;
2 changes: 2 additions & 0 deletions src/presets/columns.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ export default {
'og_image',
'request_time',
'status',
'error',
'dom_size',
'html_size',
'is_canonical',
Expand Down Expand Up @@ -48,6 +49,7 @@ export default {
'url',
'depth',
'status',
'error',
'mixed_content_url',
'is_canonical',
'request_time',
Expand Down
5 changes: 5 additions & 0 deletions src/presets/fields.js
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,11 @@ export const fields = [
comment: 'URL',
groups: ['info'],
},
{
name: 'error',
comment: 'Error',
groups: ['info'],
},
{
name: 'screenshot',
comment: 'Screenshot',
Expand Down
6 changes: 6 additions & 0 deletions src/presets/scraperFields.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ export default {
"seo-minimal": [
'response.url',
'depth',
'result.error',
'response.status',
'result.request_time',
'result.redirected_from',
Expand All @@ -20,6 +21,7 @@ export default {
'result.is_canonical',
'previousUrl',
'depth',
'result.error',
'response.status',
'result.request_time',
'result.redirected_from',
Expand Down Expand Up @@ -67,6 +69,7 @@ export default {
headers: [
'response.url',
'depth',
'result.error',
'response.headers.content-type',
'response.headers.',
'response.headers.x-bitrix-composite',
Expand All @@ -76,6 +79,7 @@ export default {
],
parse: [
'response.url',
'result.error',
'result.title',
'result.h1',
'result.description',
Expand All @@ -96,6 +100,7 @@ export default {
lighthouse: [
'response.url',
'result.title',
'result.error',
'lighthouse.scores.performance',
'lighthouse.scores.pwa',
'lighthouse.scores.accessibility',
Expand All @@ -112,6 +117,7 @@ export default {
'lighthouse-all': [
'response.url',
'result.title',
'result.error',
'lighthouse.scores.performance',
'lighthouse.scores.pwa',
'lighthouse.scores.accessibility',
Expand Down
7 changes: 6 additions & 1 deletion src/program.js
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,9 @@ program.postParse = async () => {
// c = 2, when lighthouse or screenshot -> c = 1
if (program.concurrency === undefined) {
program.concurrency = getConfigVal('concurrency', os.cpus().length);
if (config.maxConcurrency && program.concurrency > config.maxConcurrency) {
program.concurrency = config.maxConcurrency;
}
}
if (program.lighthouse) {
program.concurrency = 1;
Expand Down Expand Up @@ -219,6 +222,7 @@ program.option('-u --urls <urls>', 'Comma separated url list for scan', list).
option('--upload', `Upload JSON to public web`,
getConfigVal('upload', false)).
option('--no-color', `No console colors`).
option('--partial-report <partialReport', ``).
option('--lang <lang>', `Language (en, ru, default: system language)`,
getConfigVal('lang', undefined)).
option('--open-file',
Expand Down Expand Up @@ -267,7 +271,8 @@ program.getOptions = () => {
removeSelectors: program.removeSelectors, // удалить селекторы перед скриншотом
urls: program.urls, // адреса для одиночного сканирования
timeout: program.timeout, // таймаут запроса одной страницы
disablePlugins: program.disablePlugins
disablePlugins: program.disablePlugins,
partialReport: program.partialReport,
};
return opts;
}
Expand Down

0 comments on commit 69894c0

Please sign in to comment.