Skip to content

Commit

Permalink
feat: --influxdb-max-send, working in CLI, config influxdb.maxSendCount
Browse files Browse the repository at this point in the history
  • Loading branch information
popstas committed Dec 26, 2020
1 parent f9f729e commit b09b54e
Show file tree
Hide file tree
Showing 5 changed files with 80 additions and 5 deletions.
30 changes: 30 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,36 @@ site-audit-seo -u https://example.com --lighthouse
## Config file
You can copy [.site-audit-seo.conf.js](.site-audit-seo.conf.js) to your home directory and tune options.

## Send to InfluxDB
It is beta feature. How to config:

1. Add this to `~/.site-audit-seo.conf`:

``` js
module.exports = {
influxdb: {
host: 'influxdb.host',
port: 8086,
database: 'telegraf',
measurement: 'site_audit_seo', // optional
username: 'user',
password: 'password',
maxSendCount: 5, // optional, default send part of pages
}
};
```

2. Use `--influxdb-max-send` in terminal.

3. Create command for scan your urls:

```
site-audit-seo -u https://page-with-url-list.txt --url-list --lighthouse --upload --influxdb-max-send 100 >> ~/log/site-audit-seo.log
```

4. Add command to cron.


## Credentials
Based on [headless-chrome-crawler](https://github.com/yujiosaka/headless-chrome-crawler) (puppeteer). Used forked version [@popstas/headless-chrome-crawler](https://github.com/popstas/headless-chrome-crawler).

Expand Down
5 changes: 5 additions & 0 deletions src/actions/sendToInfluxDB.js
Original file line number Diff line number Diff line change
Expand Up @@ -66,11 +66,16 @@ module.exports = async (jsonPath, options) => {
const influx = init(options);

// points list for influx
const sendFirstCount = options.influxdb.maxSendCount || 5;
let sent = 0;
const points = [];
for (let item of data.items) {
// console.log('item: ', item);
const point = await getPoint(item, schema);
points.push(point);

sent++;
if (sent >= sendFirstCount) break;
}

// console.log('writePoints');
Expand Down
4 changes: 2 additions & 2 deletions src/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@ const {saveAsXlsx, saveAsJson, uploadJson, publishGoogleDrive, startViewer} = re
'./actions');
const {exec} = require('child_process');

program.parse(process.argv);

async function start() {
program.parse(process.argv);

if (!program.urls) {
console.log(`${program.name()} ${program.version()}`);
console.log(`Usage: ${program.name()} ${program.usage()}`);
Expand Down
37 changes: 35 additions & 2 deletions src/program.js
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,23 @@ const fieldsCustomCollect = (value, previous) => {

function getConfigVal(name, def) {
let val = undefined;
if (config[name] === undefined) val = def;
// objects like 'influxdb.maxSendCount'
if (name.includes('.')) {
const parts = name.split('.');
let conf = config;
for (let part of parts) {
conf = conf[part];
if (conf === undefined) {
conf = def;
break;
}
}

if (typeof conf === 'object') val = def;
else val = conf;
}

else if (config[name] === undefined) val = def;
else val = config[name];
// console.log(`config: ${name}: `, val);
return val;
Expand Down Expand Up @@ -105,6 +121,8 @@ program.option('-u --urls <urls>', 'Comma separated url list for scan', list).
getConfigVal('ignoreRobotsTxt', false)).
option('-m, --max-requests <num>', `Limit max pages scan`,
getConfigVal('maxRequests', 0)).
option('--influxdb-max-send <num>', `Limit send to InfluxDB`,
getConfigVal('influxdb.maxSendCount', 5)).
option('--no-headless', `Show browser GUI while scan`,
!getConfigVal('headless', true)).
option('--remove-csv', `No delete csv after xlsx generate`,
Expand Down Expand Up @@ -178,7 +196,9 @@ program.postParse = async () => {
program.concurrency = os.cpus().length;
}

if (program.urls && program.urls.length > 1) program.urlList = true;
if (program.urls) {
if (program.urls.length > 1) program.urlList = true;
}
else program.urls = [];

if (program.urlList) {
Expand All @@ -190,6 +210,9 @@ program.postParse = async () => {

// influxdb config fron ~/.site-audit-seo.conf.js
program.influxdb = getConfigVal('influxdb', false);
if (program.influxdb && program.influxdbMaxSend) {
program.influxdb.maxSendCount = program.influxdbMaxSend;
}

program.outDir = expandHomedir(program.outDir);
createDirIfNotExists(program.outDir);
Expand Down Expand Up @@ -283,6 +306,16 @@ program.outBrief = (options) => {
},
];

if (options.influxdb) {
brief = [...brief, ...[
{
name: 'InfluxDB max send',
value: program.influxdbMaxSend,
comment: '--influxdb-max-send 10',
},
]];
}

// only for command
if (!options.webService) {
brief = [...brief, ...[
Expand Down
9 changes: 8 additions & 1 deletion src/scrap-site.js
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ module.exports = async (baseUrl, options = {}) => {

let urls = [];
if (options.urlList) {
if (options.urls && options.urls.length > 0) urls = options.urls;
if (options.urls && options.urls.length > 1) urls = options.urls;
else urls = await parseUrls(baseUrl);
}

Expand Down Expand Up @@ -527,7 +527,14 @@ module.exports = async (baseUrl, options = {}) => {
if (options.upload) webPath = await uploadJson(jsonPath, options);
// if (options.gdrive) webPath = await publishGoogleDrive(jsonPath);

if (options.influxdb) {
log('send to InfluxDB...');
const points = await sendToInfluxDB(jsonPath, options);
log(`sent ${points.length} points`);
}

await startViewer(jsonPath, webPath);

if (options.removeJson) fs.unlinkSync(jsonPath);
}

Expand Down

0 comments on commit b09b54e

Please sign in to comment.