#!/usr/bin/env node
const puppeteer = require('puppeteer-core');
const {
// Replace with your Browser API zone credentials
AUTH = 'USER:PASS',
TARGET_URL = 'https://example.com',
} = process.env;
async function scrape(url = TARGET_URL) {
if (AUTH == 'USER:PASS') {
throw new Error(`Provide Browser API credentials in AUTH`
+ ` environment variable or update the script.`);
}
console.log(`Connecting to Browser...`);
const browserWSEndpoint = `wss://${AUTH}@brd.superproxy.io:9222`;
const browser = await puppeteer.connect({ browserWSEndpoint });
try {
console.log(`Connected! Navigating to ${url}...`);
const page = await browser.newPage();
const client = await page.createCDPSession();
const { frameTree: { frame } } = await client.send('Page.getFrameTree');
const { url: inspectUrl } = await client.send('Page.inspect', {
frameId: frame.id,
});
console.log(`You can inspect this session at: ${inspectUrl}.`);
await page.goto(url, { timeout: 2 * 60 * 1000 });
console.log(`Navigated! Scraping page content...`);
const data = await page.content();
console.log(`Scraped! Data: ${data}`);
} finally {
await browser.close();
}
}
function getErrorDetails(error) {
if (error.target?._req?.res) {
const {
statusCode,
statusMessage,
} = error.target._req.res;
return `Unexpected Server Status ${statusCode}: ${statusMessage}`;
}
}
if (require.main == module) {
scrape().catch(error => {
console.error(getErrorDetails(error)
|| error.stack
|| error.message
|| error);
process.exit(1);
});
}