-
Notifications
You must be signed in to change notification settings - Fork 0
/
index.js
53 lines (47 loc) · 2.74 KB
/
index.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
// ** If you wish to customize this package, please navigate to the utils folder and check the documentation. Depending on the customization you wish to implement, you likely do not need to modify the shinySpider() async function. **
// Add puppeteer, progress and chalk packages from npm to build main operation shinySpider().
const puppeteer = require("puppeteer");
const ProgressBar = require('progress');
const chalk = require('chalk');
const util = require('util');
const { tickerArray, spiderLogic } = require('./utils')
// shinySpider() uses all logic from the package to compile the finished scraper this is the function users call to start the scraping process.
// Customization should be handled first in the .js files in the /utils directory.
async function shinySpider() {
// Launch a new headless browser instance using puppeteer.
const browser = await puppeteer.launch({ headless: "new" });
const page = await browser.newPage();
// Define the scrapeResults array to store scraped data in, and initialize a progress bar.
let scrapeResults = [];
const log = console.log;
const progressBarWidth = Math.min(process.stdout.columns - 30, 40);
const scrapeProgress = new ProgressBar(':bar :percent :etas', {
total: tickerArray.length,
width: progressBarWidth,
incomplete: chalk.red('-'),
complete: chalk.green('*'),
});
// Add a log to indicate the shinySpider function has been hit, before entering the try-catch block.
log(chalk.bold.cyan('Starting scrape, this process could take up to a half hour. Progress will be logged to the server console.'))
// Add a for loop to loop over the tickerArray from /utils and scrape the data for each ticker as defined in the scrapeData(ticker) function also from /utils.
for (const ticker of tickerArray) {
try {
if (spiderLogic.rateLimitCheck()) {
const scrape = await spiderLogic.scrapeData(ticker, page);
scrapeResults.push(scrape);
spiderLogic.lastScrape = Date.now();
log(chalk.bold.cyan(`Scrape finished for ${ticker} 🗃️`));
scrapeProgress.tick();
} else {
// Return this log if the rate limit is hit. See docs for more information.
return chalk.bold.red('\n' + `🛑 Six hour rate limit hit. Please review the packages documentation regarding rate limiting and customization for help.`)
}
} catch (error) {
console.error(chalk.bold.red('\n' + `❕ Failed to run shinySpider function with error code: ${error}. ❕`))
}
}
// Close the browser instance and return the results
await browser.close();
return scrapeResults;
};
module.exports = shinySpider