Asynchronous control flow wrapper to crawl websites
npm install jcrawler
const jcrawler = require('jcrawler')
const puppeteer = require('puppeteer')
(async () => {
const crawler = jcrawler({
puppeteer,
concurrency: 2,
rateLimit: 1000, // 1 second
retries: 5,
retryInterval: 1000, // 1 second
backoff: 2, // multiplies the retryInterval for each retry
log: true
})
crawler
.on('data', data => console.log(data)) // events: data, error and end
.on('error', err => console.error(err))
.on('end', (data, results) => console.log(results.timer.time))
const fruits = ['apple', 'banana', 'orange']
await crawler.each(fruits, async (browser, page, fruit) => {
// using puppeteer
await page.goto('http://google.com')
await page.type("input[title='Search']", fruit)
await page.click("input[value=\"I'm Feeling Lucky\"]")
await page.screenshot({ path: `${fruit}.png`) })
})
})()