Website crawler (Webcrawler) Javascript integration
Install dependency
npm i webcrawlerapi-js
How to get an access key?
Read Docs Access Key section to obtain a key.
Usage
In sync way, waiting for all items are done:
import webcrawlerapi from "webcrawlerapi-js";
async function main() {
const client = new webcrawlerapi.WebcrawlerClient(
"YOUR API ACCESS KEY HERE",
)
const response = await client.crawl({
"items_limit": 3,
"url": "https://books.toscrape.com/",
"scrape_type": "markdown"
}
)
console.log(response)
}
main().catch(console.error);
Async way
import webcrawlerapi from "webcrawlerapi-js";
async function main() {
const client = new webcrawlerapi.WebcrawlerClient(
"YOUR API ACCESS KEY HERE"
)
const response = await client.crawlAsync({
"items_limit": 20,
"url": "https://books.toscrape.com/",
"scrape_type": "markdown"
}
)
console.log("Job id: "+response.id)
console.log(`Job Dashboard link: https://dash.webcrawlerapi.com/jobs/job/${response.id}`)
for (let i = 0; i < 100; i++) {
const job = await client.getJob(response.id)
const doneItemsCount = job.job_items.filter(item => item.status === "done").length
const limitItemsCount = job.items_limit
if (doneItemsCount === limitItemsCount) {
console.log("All items are done")
client.getJob(response.id).then((job) => {
job.job_items.forEach((item) => {
console.log(item.original_url)
console.log("\t", item.markdown_content_url)
})
})
break
}
console.log(`Crawled ${job.job_items.filter(item => item.status === "done").length} out of ${job.items_limit} items`)
await new Promise(r => setTimeout(r, 2000))
}
}
main().catch(console.error);