using PuppeteerSharp;
using System.Net.WebSockets;
using System.Text;
class Scraper
{
private string _auth;
public Scraper(string auth)
{
_auth = auth;
}
private async Task<IBrowser> Connect()
{
if (_auth == "USER:PASS")
{
throw new Exception("Provide Browser API credentials in AUTH"
+ " environment variable or update the script.");
}
var options = new ConnectOptions()
{
BrowserWSEndpoint = "wss://brd.superproxy.io:9222",
WebSocketFactory = async (uri, options, cToken) =>
{
var socket = new ClientWebSocket();
var authBytes = Encoding.UTF8.GetBytes(_auth);
var authHeader = "Basic " + Convert.ToBase64String(authBytes);
socket.Options.SetRequestHeader("Authorization", authHeader);
socket.Options.KeepAliveInterval = TimeSpan.Zero;
await socket.ConnectAsync(uri, cToken);
return socket;
},
};
return await Puppeteer.ConnectAsync(options);
}
public async Task Scrape(string url)
{
Console.WriteLine("Connecting to Browser...");
var browser = await Connect();
try {
Console.WriteLine($"Connected! Navigating to {url}...");
var page = await browser.NewPageAsync();
var client = await page.Target.CreateCDPSessionAsync();
var frames = await client.SendAsync("Page.getFrameTree");
var frameId = frames!.Value.GetProperty("frameTree").GetProperty("frame")
.GetProperty("id").GetString();
var parameters = new Dictionary<string, object> { { "frameId", frameId } };
var inspect = await client.SendAsync("Page.inspect", parameters);
var inspectUrl = inspect!.Value.GetProperty("url").GetString();
Console.WriteLine($"You can inspect this session at: {inspectUrl}");
await page.GoToAsync(url, /* timeout= */ 2 * 60 * 1000);
Console.WriteLine("Navigated! Scraping page content...");
var data = await page.GetContentAsync();
Console.WriteLine($"Scraped! Data: {data}");
} finally {
await browser.CloseAsync();
}
}
private static string Env(string name, string defaultValue)
{
return Environment.GetEnvironmentVariable(name) ?? defaultValue;
}
public static async Task Main()
{
// Replace with your Browser API zone credentials
var auth = Env("AUTH", "USER:PASS");
var url = Env("TARGET_URL", "https://example.com");
var scraper = new Scraper(auth);
await scraper.Scrape(url);
}
}