Skip to content

Commit

Permalink
feat: --upload report to persistent public store
Browse files Browse the repository at this point in the history
  • Loading branch information
popstas committed Aug 21, 2020
1 parent e49edee commit 192665e
Show file tree
Hide file tree
Showing 8 changed files with 72 additions and 11 deletions.
7 changes: 5 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,11 +37,11 @@ Usage: site-audit-seo -u https://example.com
Options:
-u --urls <urls> Comma separated url list for scan
-p, --preset <preset> Table preset (minimal, seo, headers, parse, lighthouse) (default: "seo")
-p, --preset <preset> Table preset (minimal, seo, headers, parse, lighthouse, lighthouse-all) (default: "seo")
-e, --exclude <fields> Comma separated fields to exclude from results
-d, --max-depth <depth> Max scan depth (default: 10)
-c, --concurrency <threads> Threads number (default: 2)
--lighthouse Do lighthouse
--lighthouse Appends base Lighthouse fields to preset
--delay <ms> Delay between requests (default: 0)
-f, --fields <json> Field in format --field 'title=$("title").text()' (default: [])
--no-skip-static Scan static files
Expand All @@ -55,8 +55,11 @@ Options:
--out-dir <dir> Output directory (default: ".")
--csv <path> Skip scan, only convert csv to xlsx
--web Publish sheet to google docs
--json Output results in JSON
--upload Upload JSON to public web
--no-color No console colors
--open-file Open file after scan (default: yes on Windows and MacOS)
--no-open-file Don't open file after scan
--no-console-validate Don't output validate messages in console
-V, --version output the version number
-h, --help display help for command
Expand Down
13 changes: 13 additions & 0 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
"dependencies": {
"@popstas/headless-chrome-crawler": "^1.8.2",
"@popstas/xlsx-style": "^0.8.20",
"axios": "^0.20.0",
"chrome-launcher": "^0.13.4",
"commander": "^5.0.0",
"csvtojson": "^2.0.10",
Expand Down
10 changes: 8 additions & 2 deletions src/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ const packageJson = require('../package.json');
const scrapSite = require('./scrap-site');
const saveAsXlsx = require('./save-as-xlsx');
const saveAsJson = require('./save-as-json');
const uploadJson = require('./upload-json');
const publishGoogleSheets = require('./publish-google-sheets');
const startViewer = require('./start-viewer');
const { exec } = require('child_process');
Expand Down Expand Up @@ -56,6 +57,7 @@ program
.option('--csv <path>', `Skip scan, only convert csv to xlsx`)
.option('--web', `Publish sheet to google docs`)
.option('--json', `Output results in JSON`)
.option('--upload', `Upload JSON to public web`)
.option('--no-color', `No console colors`)
.option('--open-file', `Open file after scan (default: yes on Windows and MacOS)`)
.option('--no-open-file', `Don't open file after scan`)
Expand All @@ -73,12 +75,14 @@ async function start() {
if(program.csv) {
const csvPath = program.csv
const xlsxPath = csvPath.replace(/\.csv$/, '.xlsx');
const jsonPath = csvPath.replace(/\.csv$/, '.json');
let jsonPath = csvPath.replace(/\.csv$/, '.json');
let webPath;
try {
saveAsXlsx(csvPath, xlsxPath);
if (program.json) await saveAsJson(csvPath, jsonPath);
if (program.upload) webPath = await uploadJson(jsonPath, program);
if (program.web) await publishGoogleSheets(xlsxPath);
if (program.json) await startViewer(jsonPath);
if (program.json) await startViewer(jsonPath, webPath);
console.log(`${xlsxPath} saved`);
if(program.openFile) exec(`"${xlsxPath}"`);
} catch(e) {
Expand Down Expand Up @@ -193,6 +197,8 @@ async function start() {
fields: program.fields, // дополнительные поля, --fields 'title=$("title").text()'
removeCsv: program.removeCsv, // удалять csv после генерации xlsx
web: program.web, // публиковать на google docs
json: program.json, // сохранять json файл
upload: program.upload, // выгружать json на сервер
consoleValidate: program.consoleValidate, // выводить данные валидации в консоль
obeyRobotsTxt: !program.ignoreRobotsTxt, // не учитывать блокировки в robots.txt
});
Expand Down
8 changes: 8 additions & 0 deletions src/presets/columns.js
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,14 @@ module.exports = {
'schema_types',
]
},
title: {
name: 'title',
groups: ['seo'],
columns: [
'url',
'title',
]
},
mixed_content: {
name: 'mixed_content',
groups: ['info', 'seo'],
Expand Down
11 changes: 9 additions & 2 deletions src/scrap-site.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@
const fs = require('fs');
const saveAsXlsx = require('./save-as-xlsx');
const saveAsJson = require('./save-as-json');
const uploadJson = require('./upload-json');
const publishGoogleSheets = require('./publish-google-sheets');
const startViewer = require('./start-viewer');
const HCCrawler = require('@popstas/headless-chrome-crawler');
const CSVExporter = require('@popstas/headless-chrome-crawler/exporter/csv');
const url = require('url');
Expand Down Expand Up @@ -255,6 +257,7 @@ module.exports = async (baseUrl, options = {}) => {
const csvPath = `${options.outDir}/${domain}.csv`;
const xlsxPath = `${options.outDir}/${domain}.xlsx`;
const jsonPath = `${options.outDir}/${domain}.json`;
let webPath;

if(!options.color) color.white = color.red = color.reset = color.yellow = '';

Expand Down Expand Up @@ -674,16 +677,20 @@ module.exports = async (baseUrl, options = {}) => {
let isSuccess = true;
try {
saveAsXlsx(csvPath, xlsxPath);
if (options.web) await publishGoogleSheets(xlsxPath);
if (options.json) await saveAsJson(csvPath, jsonPath);
if (options.upload) webPath = await uploadJson(jsonPath, options);
if (options.web) await publishGoogleSheets(xlsxPath);
if (options.json) await startViewer(jsonPath, webPath);
} catch (e) {
if(e.code == 'EBUSY'){
isSuccess = false;
console.error(`${color.red}${xlsxPath} is busy, please close file in 10 seconds!`);
setTimeout(async () => {
saveAsXlsx(csvPath, xlsxPath);
if (options.web) await publishGoogleSheets(xlsxPath);
if (options.json) await saveAsJson(csvPath, jsonPath);
if (options.upload) webPath = await uploadJson(jsonPath, options);
if (options.web) await publishGoogleSheets(xlsxPath);
if (options.json) await startViewer(jsonPath, webPath);
finishScan();
}, 10000)
}
Expand Down
9 changes: 4 additions & 5 deletions src/start-viewer.js
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
const http = require('http')
const fs = require('fs')
const express = require('express')

module.exports = async (jsonPath) => {
module.exports = async (jsonPath, webPath=false) => {
const app = express();
const port = 3001;
app.use('/', express.static('./web'));
Expand All @@ -18,10 +17,10 @@ module.exports = async (jsonPath) => {
});

app.listen(port, () => {
const dataJson = `http://localhost:${port}/data.json`;
const dataJson = webPath || `http://localhost:${port}/data.json`;
// console.log(`Open for view report: http://localhost:${port}/`);
console.log(`Online viewer: https://viasite.github.io/site-audit-seo-viewer/?url=${dataJson}`);
console.log(`Dev viewer: http://localhost:3000/?url=${dataJson}`);
console.log(`JSON file: ${dataJson}`);
console.log(`Dev viewer: http://localhost:3000/?url=${dataJson}`);
console.log(`Online viewer: https://viasite.github.io/site-audit-seo-viewer/?url=${dataJson}`);
});
}
24 changes: 24 additions & 0 deletions src/upload-json.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
const fs = require('fs');
const axios = require('axios');

module.exports = async (jsonPath, options) => {
const data = fs.readFileSync(jsonPath, 'utf8');
// const raw = JSON.stringify(data);

const date = new Date().toISOString().replace('T', '-').replace('Z', '');
// const dateStr = date.slice(0,10);
const name = jsonPath.replace(/[^0-9a-zа-я_.]/ig, "");
const uploadName = date + '-' + name;

console.log('Uploading to https://site-audit.viasite.ru...');
const res = await axios.post('https://site-audit.viasite.ru/upload/', {
name: uploadName,
data: data
});

if (res.status !== 200 || !res.data.url) {
console.error('Failed to upload file!');
return jsonPath;
}
return res.data.url;
}

0 comments on commit 192665e

Please sign in to comment.