with one click
vercel-browser-automation
Build multi-step browser automation workflows with AI agent loops, error recovery, and state management
Menu
Build multi-step browser automation workflows with AI agent loops, error recovery, and state management
Production patterns, API key security, cost optimization, performance tuning, and monitoring
Reduce costs and latency with context caching - implicit and explicit cache management with TTL configuration
Execute Python code in Gemini's secure sandbox for data analysis, visualization, and file processing
Generate text embeddings for semantic search, RAG, and vector database integration
Implement robust error handling with retry logic, rate limiting, and circuit breaker patterns
Implement tool use with Gemini - function declarations, tool modes, parallel/compositional calling, and MCP integration
| name | vercel-browser-automation |
| description | Build multi-step browser automation workflows with AI agent loops, error recovery, and state management |
| argument-hint | <workflow description or automation scenario> |
| allowed-tools | Read, Write, Edit, Bash(agent-browser, npx, npm install, node), Glob, Grep |
Build multi-step automation workflow: $ARGUMENTS
You are a browser automation specialist with expertise in:
import { AgentBrowser } from '@anthropic-ai/agent-browser';
async function loginAndDownload() {
const agent = new AgentBrowser({ cloud: true, stealth: true });
try {
// Step 1: Navigate to login
await agent.navigate('https://app.example.com/login');
// Step 2: Fill credentials
await agent.fill('Email', process.env.EMAIL!);
await agent.fill('Password', process.env.PASSWORD!);
// Step 3: Submit login
await agent.click('Sign In');
await agent.waitForNavigation();
// Step 4: Navigate to downloads
await agent.navigate('https://app.example.com/reports');
// Step 5: Download report
await agent.click('Download Monthly Report');
await agent.waitForDownload();
return { success: true };
} catch (error) {
return { success: false, error: error.message };
} finally {
await agent.close();
}
}
import { AgentBrowser } from '@anthropic-ai/agent-browser';
import Anthropic from '@anthropic-ai/sdk';
interface AgentState {
url: string;
snapshot: string;
history: string[];
complete: boolean;
}
async function aiAgentLoop(task: string) {
const agent = new AgentBrowser({ cloud: true });
const anthropic = new Anthropic();
const state: AgentState = {
url: '',
snapshot: '',
history: [],
complete: false,
};
while (!state.complete) {
// Get current page state
state.snapshot = await agent.snapshot();
// Ask AI for next action
const response = await anthropic.messages.create({
model: 'claude-sonnet-4-20250514',
max_tokens: 1024,
system: `You are a browser automation agent. Based on the current page state, decide the next action to complete the task.
Available actions:
- navigate(url): Go to a URL
- click(text): Click element with text
- fill(label, value): Fill input field
- select(label, value): Select dropdown option
- scroll(direction): Scroll up/down
- wait(ms): Wait for specified time
- complete(): Mark task as done
- error(message): Report an error
Respond with JSON: { "action": "...", "params": {...}, "reasoning": "..." }`,
messages: [
{
role: 'user',
content: `Task: ${task}
Current URL: ${state.url || 'not yet navigated'}
Page Snapshot:
${state.snapshot}
Action History:
${state.history.join('\n') || 'No actions yet'}
What should I do next?`,
},
],
});
// Parse AI response
const text = response.content[0].type === 'text' ? response.content[0].text : '';
const decision = JSON.parse(text);
console.log(`AI decided: ${decision.action}(${JSON.stringify(decision.params)})`);
console.log(`Reasoning: ${decision.reasoning}`);
// Execute action
switch (decision.action) {
case 'navigate':
await agent.navigate(decision.params.url);
state.url = decision.params.url;
break;
case 'click':
await agent.click(decision.params.text);
break;
case 'fill':
await agent.fill(decision.params.label, decision.params.value);
break;
case 'select':
await agent.select(decision.params.label, decision.params.value);
break;
case 'scroll':
await agent.scroll(decision.params.direction);
break;
case 'wait':
await new Promise((r) => setTimeout(r, decision.params.ms));
break;
case 'complete':
state.complete = true;
break;
case 'error':
throw new Error(decision.params.message);
}
// Record history
state.history.push(`${decision.action}: ${decision.reasoning}`);
// Safety: max 20 iterations
if (state.history.length > 20) {
throw new Error('Max iterations reached');
}
}
await agent.close();
return state;
}
import { Stagehand } from '@browserbase/stagehand';
async function stagehandWorkflow() {
const stagehand = new Stagehand({
env: 'BROWSERBASE',
apiKey: process.env.BROWSERBASE_API_KEY,
});
await stagehand.init();
// Natural language navigation
await stagehand.act('Go to amazon.com');
await stagehand.act('Search for "wireless headphones"');
await stagehand.act('Click on the first result');
await stagehand.act('Add to cart');
// Extract structured data
const product = await stagehand.extract({
instruction: 'Extract the product details',
schema: {
name: 'string',
price: 'number',
rating: 'number',
reviews: 'number',
},
});
console.log(product);
await stagehand.close();
}
interface RetryOptions {
maxRetries: number;
initialDelay: number;
maxDelay: number;
backoffFactor: number;
}
async function withRetry<T>(
fn: () => Promise<T>,
options: RetryOptions
): Promise<T> {
let lastError: Error;
let delay = options.initialDelay;
for (let attempt = 1; attempt <= options.maxRetries; attempt++) {
try {
return await fn();
} catch (error) {
lastError = error as Error;
console.log(`Attempt ${attempt} failed: ${lastError.message}`);
if (attempt < options.maxRetries) {
console.log(`Retrying in ${delay}ms...`);
await new Promise((r) => setTimeout(r, delay));
delay = Math.min(delay * options.backoffFactor, options.maxDelay);
}
}
}
throw lastError!;
}
// Usage
const result = await withRetry(
() => agent.click('Flaky Button'),
{
maxRetries: 3,
initialDelay: 1000,
maxDelay: 10000,
backoffFactor: 2,
}
);
interface Checkpoint {
step: string;
state: any;
timestamp: number;
}
class CheckpointedWorkflow {
private checkpoints: Checkpoint[] = [];
private storage: Storage;
constructor(workflowId: string) {
this.storage = new Storage(`workflow-${workflowId}`);
}
async checkpoint(step: string, state: any) {
const cp: Checkpoint = {
step,
state,
timestamp: Date.now(),
};
this.checkpoints.push(cp);
await this.storage.save(this.checkpoints);
}
async getLastCheckpoint(): Promise<Checkpoint | null> {
const saved = await this.storage.load();
if (saved && saved.length > 0) {
return saved[saved.length - 1];
}
return null;
}
async resumeFrom(step: string): Promise<any> {
const cp = this.checkpoints.find((c) => c.step === step);
return cp?.state;
}
}
// Usage
const workflow = new CheckpointedWorkflow('order-123');
// Check for existing progress
const lastCheckpoint = await workflow.getLastCheckpoint();
if (lastCheckpoint) {
console.log(`Resuming from step: ${lastCheckpoint.step}`);
}
// Execute with checkpoints
await agent.navigate(url);
await workflow.checkpoint('navigated', { url });
await agent.fill('Email', email);
await workflow.checkpoint('email-filled', { email });
// ... continue with checkpoints
async function clickWithFallback(
agent: AgentBrowser,
primarySelector: string,
fallbacks: string[]
) {
// Try primary selector
try {
await agent.click(primarySelector);
return;
} catch {
console.log(`Primary selector failed: ${primarySelector}`);
}
// Try fallbacks
for (const fallback of fallbacks) {
try {
await agent.click(fallback);
console.log(`Fallback succeeded: ${fallback}`);
return;
} catch {
console.log(`Fallback failed: ${fallback}`);
}
}
throw new Error('All selectors failed');
}
// Usage
await clickWithFallback(agent, 'Submit', [
'button:Submit',
'[type="submit"]',
'.submit-btn',
'button:contains("Submit")',
]);
type WorkflowState =
| 'idle'
| 'navigating'
| 'filling_form'
| 'submitting'
| 'waiting'
| 'complete'
| 'error';
interface WorkflowContext {
url?: string;
formData?: Record<string, string>;
result?: any;
error?: Error;
}
class WorkflowStateMachine {
state: WorkflowState = 'idle';
context: WorkflowContext = {};
async transition(to: WorkflowState, action: () => Promise<void>) {
console.log(`${this.state} -> ${to}`);
try {
await action();
this.state = to;
} catch (error) {
this.context.error = error as Error;
this.state = 'error';
throw error;
}
}
}
// Usage
const machine = new WorkflowStateMachine();
await machine.transition('navigating', async () => {
await agent.navigate('https://example.com');
machine.context.url = 'https://example.com';
});
await machine.transition('filling_form', async () => {
await agent.fill('Email', 'user@example.com');
machine.context.formData = { email: 'user@example.com' };
});
await machine.transition('submitting', async () => {
await agent.click('Submit');
});
await machine.transition('complete', async () => {
machine.context.result = await agent.extract('.success-message');
});
import readline from 'readline';
async function askForConfirmation(message: string): Promise<boolean> {
const rl = readline.createInterface({
input: process.stdin,
output: process.stdout,
});
return new Promise((resolve) => {
rl.question(`${message} (y/n): `, (answer) => {
rl.close();
resolve(answer.toLowerCase() === 'y');
});
});
}
async function purchaseWorkflow() {
const agent = new AgentBrowser({ cloud: true });
// Add to cart
await agent.navigate('https://shop.example.com/product');
await agent.click('Add to Cart');
// Show cart summary
const summary = await agent.extract('.cart-summary');
console.log('Cart Summary:', summary);
// Confirm before checkout
const confirmed = await askForConfirmation('Proceed to checkout?');
if (confirmed) {
await agent.click('Checkout');
// ... continue checkout
} else {
console.log('Checkout cancelled');
}
await agent.close();
}
async function handleCaptcha(agent: AgentBrowser) {
const snapshot = await agent.snapshot();
if (snapshot.includes('captcha') || snapshot.includes('I am not a robot')) {
console.log('CAPTCHA detected!');
// Open live view for human
const liveUrl = await agent.getLiveViewUrl();
console.log(`Please solve the CAPTCHA: ${liveUrl}`);
// Wait for human to solve
await agent.waitForElement('[data-captcha-solved="true"]', {
timeout: 120000, // 2 minutes
});
console.log('CAPTCHA solved, continuing...');
}
}
#!/bin/bash
# workflow.sh - Complete order workflow
set -e # Exit on error
BASE_URL="https://shop.example.com"
SESSION_NAME="order-workflow-$$"
# Create session
echo "Creating browser session..."
agent-browser session create --name "$SESSION_NAME" --persist --cloud --stealth
# Function to run command with session
run() {
agent-browser --session "$SESSION_NAME" "$@"
}
# Step 1: Login
echo "Step 1: Logging in..."
run navigate "$BASE_URL/login"
run fill "Email" "$USER_EMAIL"
run fill "Password" "$USER_PASSWORD"
run click "Sign In"
run wait --navigation
# Step 2: Add to cart
echo "Step 2: Adding product to cart..."
run navigate "$BASE_URL/products/widget-123"
run click "Add to Cart"
run wait "Added to cart"
# Step 3: Checkout
echo "Step 3: Starting checkout..."
run navigate "$BASE_URL/cart"
run click "Checkout"
# Step 4: Fill shipping
echo "Step 4: Filling shipping info..."
run fill "Address" "$SHIPPING_ADDRESS"
run fill "City" "$SHIPPING_CITY"
run fill "ZIP" "$SHIPPING_ZIP"
run select "State" "$SHIPPING_STATE"
run click "Continue"
# Step 5: Confirm order
echo "Step 5: Confirming order..."
run click "Place Order"
run wait "Order confirmed"
# Extract order number
ORDER=$(run extract ".order-number")
echo "Order placed: $ORDER"
# Cleanup
agent-browser session delete --name "$SESSION_NAME"
echo "Workflow complete!"
#!/bin/bash
# parallel-scrape.sh - Scrape multiple URLs in parallel
URLS=(
"https://example.com/page1"
"https://example.com/page2"
"https://example.com/page3"
"https://example.com/page4"
)
MAX_PARALLEL=4
scrape_url() {
local url=$1
local output=$(basename "$url").json
agent-browser --cloud snapshot "$url" --format json > "$output"
echo "Scraped: $url -> $output"
}
export -f scrape_url
# Run in parallel
printf '%s\n' "${URLS[@]}" | xargs -P $MAX_PARALLEL -I {} bash -c 'scrape_url "$@"' _ {}
echo "All URLs scraped!"
For: $ARGUMENTS
Provide: