Skip to content

Commit

Permalink
feat: --delay between requests
Browse files Browse the repository at this point in the history
  • Loading branch information
popstas committed Apr 24, 2020
1 parent 33535b2 commit cc15ea8
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 1 deletion.
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ Options:
-p, --preset <preset> Table preset (minimal, seo, headers, parse) (default: "seo")
-d, --max-depth <depth> Max scan depth (default: 10)
-c, --concurrency <threads> Threads number (default: 2)
--delay <ms> Delay between requests (default: 0)
-f, --fields <json> JSON with custom fields
--no-skip-static Scan static files
--no-limit-domain Scan not only current domain
Expand Down Expand Up @@ -185,3 +186,4 @@ sites-scraper -d 1 -u https://example -f '{ "title": "$(`title`).text()" }'

## TODO:
- Unique links
- After timeout scraper freezes?
9 changes: 8 additions & 1 deletion src/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ program
.option('-p, --preset <preset>', 'Table preset (minimal, seo, headers, parse)', 'seo')
.option('-d, --max-depth <depth>', 'Max scan depth', 10)
.option('-c, --concurrency <threads>', 'Threads number', 2)
.option('--delay <ms>', 'Delay between requests', 0)
.option('-f, --fields <json>', 'JSON with custom fields', JSON.parse)
.option('--no-skip-static', `Scan static files`)
.option('--no-limit-domain', `Scan not only current domain`)
Expand Down Expand Up @@ -54,6 +55,11 @@ async function start() {

const sites = program.urls;

if(program.delay > 0 && program.concurrency != 1) {
console.log('Force set concurrency to 1, must be 1 when delay is set');
program.concurrency = 1;
}

if(program.docsExtensions === undefined) {
program.docsExtensions = ['doc', 'docx', 'xls', 'xlsx', 'ppt', 'pptx', 'pdf', 'rar', 'zip'];
}
Expand All @@ -63,7 +69,8 @@ async function start() {
await scrapSite(site, {
fields_preset: program.preset, // варианты: default, seo, headers, minimal
maxDepth: program.maxDepth, // глубина сканирования
maxConcurrency: program.concurrency, // параллельно открываемые вкладки
maxConcurrency: parseInt(program.concurrency), // параллельно открываемые вкладки
delay: parseInt(program.delay), // задержка между запросами
skipStatic: program.skipStatic, // не пропускать подгрузку браузером статики (картинки, css, js)
followSitemapXml: program.followXmlSitemap, // чтобы найти больше страниц
limitDomain: program.limitDomain, // не пропускать подгрузку браузером статики (картинки, css, js)
Expand Down

0 comments on commit cc15ea8

Please sign in to comment.