Skip to content

Commit

Permalink
feat: change user agent and defaults for parallelism and wait time
Browse files Browse the repository at this point in the history
  • Loading branch information
wintermi committed Dec 9, 2023
1 parent 7428a09 commit 9dc2414
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 3 deletions.
2 changes: 1 addition & 1 deletion crawler.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ func NewCrawler(elementSelector string, jqSelector string, waitTime int, paralle
// Initialise New Crawler
c := new(Crawler)
c.Collector = colly.NewCollector(
colly.UserAgent("Mozilla/5.0 (X11; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/120.0"),
colly.UserAgent("Mozilla/5.0 (X11; Fedora; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/120.0"),
colly.MaxDepth(1),
colly.Async(true),
)
Expand Down
4 changes: 2 additions & 2 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,8 @@ func main() {
var outputCsvFile = flag.String("o", "", "Output Scraped Data CSV File (Required)")
var errorCsvFile = flag.String("e", "", "Failed Request URLs Output CSV File (Required)")
var fieldDelimiter = flag.String("d", ",", "Field Delimiter (Required)")
var parallelism = flag.Int("p", 10, "Parallelism or Maximum allowed Concurrent Requests")
var waitTime = flag.Int("w", 500, "Random Wait Time in Milliseconds between Requests")
var parallelism = flag.Int("p", 100, "Parallelism or Maximum allowed Concurrent Requests")
var waitTime = flag.Int("w", 2000, "Random Wait Time in Milliseconds between Requests")
var scrapeXML = flag.Bool("x", false, "Scrape XML not HTML")
var verbose = flag.Bool("v", false, "Output Verbose Detail")

Expand Down

0 comments on commit 9dc2414

Please sign in to comment.