Skip to content

Browser Automation

cbintel provides two browser automation systems: Ferret/SWRM for distributed browser control with adaptive learning, and Playwright for screenshots and DOM extraction.

Overview

graph TB
    subgraph "Ferret/SWRM"
        CLI[cbintel-ferret CLI]
        CLIENT[SWRMClient]
        EXECUTOR[ScriptExecutor]
        LEARNING[AdaptiveLearningAgent]
    end

    subgraph "Playwright"
        SCREENSHOT[ScreenshotService]
        PDF[PDFService]
        DOM[DOMService]
    end

    subgraph "External"
        SWRM[SWRM Server]
        BROWSER[Browser Pool]
    end

    CLI --> CLIENT --> SWRM --> BROWSER
    EXECUTOR --> CLIENT
    LEARNING --> EXECUTOR
    SCREENSHOT & PDF & DOM --> BROWSER

Ferret/SWRM

Distributed browser automation with WebSocket JSON-RPC protocol.

Module Structure

src/cbintel/ferret/
├── __init__.py       # Public exports
├── config.py         # Settings
├── exceptions.py     # Error hierarchy
├── models.py         # Pydantic models
├── actions.py        # AST action nodes
├── client.py         # SWRMClient
├── executor.py       # Script execution
├── learning.py       # Adaptive learning
└── cli.py            # CLI interface

SWRMClient

Core async WebSocket client for browser control.

from cbintel.ferret import SWRMClient

async with SWRMClient() as client:
    # Create browser session
    session = await client.create_session()

    # Navigate
    await client.navigate("https://example.com")

    # Interact
    await client.click("button#submit")
    await client.fill("input[name='query']", "search term")

    # Extract
    text = await client.extract_text("h1")
    screenshot = await client.screenshot()

    # Get DOM
    dom = await client.get_dom()

API Methods

Category Methods
Session create_session(), get_current_url(), get_title()
Navigation navigate(), reload(), back(), forward()
Interaction click(), fill(), select(), hover(), scroll()
Waiting wait(), wait_for_element(), wait_for_text()
Extraction extract_text(), screenshot(), get_dom()
Execution execute_script(), execute_sequence()

Script Execution

Execute complex automation scripts:

from cbintel.ferret import ScriptExecutor, SWRMClient

async with SWRMClient() as client:
    executor = ScriptExecutor(client)

    # Execute from JSON definition
    result = await executor.execute({
        "type": "sequence",
        "actions": [
            {"type": "navigate", "url": "https://example.com"},
            {"type": "fill", "selector": "input[name='q']", "value": "test"},
            {"type": "click", "selector": "button[type='submit']"},
            {"type": "wait_for_element", "selector": ".results"},
            {"type": "extract_text", "selector": ".results"}
        ]
    })

Adaptive Learning

AI-powered goal achievement:

from cbintel.ferret import AdaptiveLearningAgent, GoalRequest

async with AdaptiveLearningAgent() as agent:
    # Describe the goal in natural language
    result = await agent.achieve_goal(GoalRequest(
        url="https://google.com",
        goal="Search for 'python tutorials' and extract the first 5 results"
    ))

    if result.success:
        print(f"Goal achieved!")
        print(f"Results: {result.data}")
    else:
        print(f"Best attempt: {result.best_score}")

Learning Modes

Mode Description Confidence
MEMORY Past experiences only 0.9 × original
LLM LLM generation only 0.7
HYBRID Memory + LLM combined 0.75
EXPLORATION Random exploration 0.3

CLI Commands

# Navigation
cbintel-ferret navigate https://example.com

# Interaction
cbintel-ferret click "button#submit"
cbintel-ferret fill "input[name='q']" "search term"

# Extraction
cbintel-ferret screenshot --full-page -o page.png
cbintel-ferret dom --json

# Script execution
cbintel-ferret run script.json

# Adaptive learning
cbintel-ferret achieve "Search for 'python tutorials' on google.com"

Playwright (Screenshots)

Direct browser automation for screenshots, PDFs, and DOM extraction.

ScreenshotService

from cbintel.screenshots import ScreenshotService, CaptureConfig

config = CaptureConfig(
    browser="chromium",
    viewport_width=1920,
    viewport_height=1080,
)

async with ScreenshotService(config) as service:
    # Full page screenshot
    capture = await service.screenshot(
        "https://example.com",
        full_page=True,
    )
    capture.save("screenshot.png")

    # Element screenshot
    capture = await service.screenshot(
        "https://example.com",
        selector="#header",
    )

    # With DOM capture
    capture = await service.screenshot(
        "https://example.com",
        capture_dom=True,
    )
    print(capture.dom)

PDFService

from cbintel.screenshots import PDFService, PDFConfig

config = PDFConfig(
    format="A4",
    landscape=False,
    scale=1.0,
    margin_top="1cm",
    margin_bottom="1cm",
)

async with PDFService(config) as service:
    result = await service.generate(
        "https://example.com",
        output=Path("document.pdf"),
    )
    print(f"Generated: {len(result.pdf)} bytes")

DOMService

from cbintel.screenshots import DOMService, DOMConfig

config = DOMConfig(
    browser="chromium",
    viewport_width=1920,
    viewport_height=1080,
)

async with DOMService(config) as service:
    # Extract elements
    elements = await service.extract(
        "https://example.com",
        selector="a, button",
        include_positions=True,
    )
    for el in elements:
        print(f"{el.tag}: {el.text} at ({el.x}, {el.y})")

    # Extract links
    links = await service.extract_links("https://example.com")

    # Get full HTML
    html = await service.get_html("https://example.com")

CLI Commands

# Screenshot
cbintel-screenshots screenshot https://example.com -o screenshot.png
cbintel-screenshots screenshot https://example.com --full-page -o full.png

# PDF
cbintel-screenshots pdf https://example.com -o document.pdf --format A4

# DOM extraction
cbintel-screenshots dom https://example.com --selector "a" -o links.json
cbintel-screenshots links https://example.com

Graph Operations

screenshot Operation

- op: screenshot
  params:
    url: "https://example.com"
    full_page: true
    viewport_width: 1920
    viewport_height: 1080
  output: image

tor_screenshot Operation

Anonymous screenshots through Tor:

- op: tor_screenshot
  params:
    url: "http://example.onion"
    full_page: true
    timeout: 90000
    tor_proxy: "socks5://127.0.0.1:9050"
  output: image

browser Operation

Ferret automation in graphs:

- op: browser
  params:
    url: "https://example.com"
    actions:
      - type: fill
        selector: "input[name='q']"
        value: "{{ query }}"
      - type: click
        selector: "button[type='submit']"
      - type: wait_for_element
        selector: ".results"
      - type: extract_text
        selector: ".results"
  output: results

Configuration

Ferret Environment Variables

FERRET_SWRM_URL=ws://localhost:8000/ws
FERRET_HTTP_URL=http://localhost:8000
FERRET_API_KEY=your-api-key
FERRET_TIMEOUT=30.0
FERRET_MAX_RETRIES=3

Playwright Configuration

from cbintel.screenshots import CaptureConfig

config = CaptureConfig(
    browser="chromium",      # chromium, firefox, webkit
    viewport_width=1920,
    viewport_height=1080,
    device_scale_factor=1.0,
    is_mobile=False,
    user_agent="Mozilla/5.0...",
)

Error Handling

Ferret Exceptions

from cbintel.ferret import (
    FerretError,
    SWRMConnectionError,
    SWRMTimeoutError,
    SWRMAuthenticationError,
    ScriptExecutionError,
    SelectorNotFoundError,
    GoalNotAchievedError,
)

try:
    await client.click("button#submit")
except SelectorNotFoundError as e:
    print(f"Selector not found: {e.selector}")
    print(f"Tried alternatives: {e.alternatives}")
except SWRMTimeoutError:
    print("Operation timed out")
except FerretError as e:
    print(f"Ferret error: {e}")

Playwright Exceptions

from playwright.async_api import TimeoutError

try:
    await service.screenshot(url)
except TimeoutError:
    print("Page load timed out")
except Exception as e:
    print(f"Screenshot failed: {e}")

Best Practices

Selector Strategy

# Prefer specific selectors
await client.click("button#submit-form")

# Use data attributes for stability
await client.click("[data-testid='submit']")

# Avoid fragile selectors
# Bad: div.container > div:nth-child(3) > button
# Good: button[data-action='submit']

Wait Strategy

# Wait for element before interacting
await client.wait_for_element(".results", timeout=10000)
text = await client.extract_text(".results")

# Wait for navigation
await client.navigate(url)
await client.wait_for_element("body")

Error Recovery

async def click_with_retry(client, selector, max_retries=3):
    for attempt in range(max_retries):
        try:
            await client.click(selector)
            return
        except SelectorNotFoundError:
            await asyncio.sleep(1)
    raise Exception(f"Failed to click {selector}")

Requirements

# Install Playwright browsers
playwright install

# For Ferret, ensure SWRM server is running
docker run -p 8000:8000 swrm/server