Version 1.2.2. Added usage overview. Shows token used and cost in $.
This commit is contained in:
@@ -1,9 +1,12 @@
|
||||
"""
|
||||
tools/browser_tool.py — Playwright headless browser tool.
|
||||
|
||||
For JS-heavy pages that httpx can't render. Enforces the same Tier 1/2
|
||||
web whitelist as WebTool. Browser instance is lazy-initialized and shared
|
||||
across calls.
|
||||
Read operations (fetch_page, screenshot) never require confirmation.
|
||||
Interactive operations (click, fill, select, press) require confirmation
|
||||
unless the target domain is in the user's browser_approved_domains list.
|
||||
|
||||
Sessions are stateful within a session_id: navigate with fetch_page first,
|
||||
then use interactive ops without a url to act on the current page.
|
||||
|
||||
Requires: playwright package + `playwright install chromium`
|
||||
"""
|
||||
@@ -13,7 +16,7 @@ import asyncio
|
||||
import logging
|
||||
from typing import ClassVar
|
||||
|
||||
from ..context_vars import current_task_id, web_tier2_enabled
|
||||
from ..context_vars import current_task_id, current_session_id, web_tier2_enabled
|
||||
from ..security import assert_domain_tier1, sanitize_external_content
|
||||
from .base import BaseTool, ToolResult
|
||||
|
||||
@@ -21,14 +24,39 @@ logger = logging.getLogger(__name__)
|
||||
|
||||
_MAX_TEXT_CHARS = 25_000
|
||||
_TIMEOUT_MS = 30_000
|
||||
_USER_AGENT = (
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
|
||||
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
|
||||
)
|
||||
|
||||
_INTERACTIVE_OPS = {"click", "fill", "select", "press"}
|
||||
|
||||
|
||||
async def _is_domain_approved(user_id: str, hostname: str) -> bool:
|
||||
"""Return True if hostname (or a parent domain) is in the user's approved list."""
|
||||
from ..database import get_pool
|
||||
pool = await get_pool()
|
||||
rows = await pool.fetch(
|
||||
"SELECT domain FROM browser_approved_domains WHERE owner_user_id = $1",
|
||||
user_id,
|
||||
)
|
||||
hostname = hostname.lower()
|
||||
for row in rows:
|
||||
d = row["domain"].lower().lstrip("*.")
|
||||
if hostname == d or hostname.endswith("." + d):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
class BrowserTool(BaseTool):
|
||||
name = "browser"
|
||||
description = (
|
||||
"Fetch web pages using a real headless browser (Chromium). "
|
||||
"Use this for JS-heavy pages or single-page apps that the regular 'web' tool cannot read. "
|
||||
"Operations: fetch_page (extract text content), screenshot (base64 PNG). "
|
||||
"Headless Chromium browser for JS-heavy pages and web interactions. "
|
||||
"Read ops: fetch_page (extract text), screenshot (PNG). "
|
||||
"Interactive ops: click, fill (type into field), select (dropdown), press (keyboard key). "
|
||||
"Interactive ops require confirmation unless the domain is in your Browser Trusted Domains list. "
|
||||
"Page state is kept across calls within the same session — navigate with fetch_page first, "
|
||||
"then use interactive ops (omit url to stay on the current page). "
|
||||
"Follows the same domain whitelist rules as the web tool."
|
||||
)
|
||||
input_schema = {
|
||||
@@ -36,87 +64,129 @@ class BrowserTool(BaseTool):
|
||||
"properties": {
|
||||
"operation": {
|
||||
"type": "string",
|
||||
"enum": ["fetch_page", "screenshot"],
|
||||
"description": "fetch_page extracts text; screenshot returns a base64 PNG.",
|
||||
"enum": ["fetch_page", "screenshot", "click", "fill", "select", "press"],
|
||||
"description": (
|
||||
"fetch_page: extract page text. "
|
||||
"screenshot: capture PNG. "
|
||||
"click: click an element (selector required). "
|
||||
"fill: type into a field (selector + value required). "
|
||||
"select: choose a <select> option (selector + value required). "
|
||||
"press: press a keyboard key (key required; selector optional)."
|
||||
),
|
||||
},
|
||||
"url": {
|
||||
"type": "string",
|
||||
"description": "URL to navigate to.",
|
||||
"description": (
|
||||
"URL to navigate to. Required for fetch_page and screenshot. "
|
||||
"For interactive ops, omit to act on the current page."
|
||||
),
|
||||
},
|
||||
"selector": {
|
||||
"type": "string",
|
||||
"description": "CSS selector for click / fill / select / press operations.",
|
||||
},
|
||||
"value": {
|
||||
"type": "string",
|
||||
"description": "Text to type (fill) or option value to select (select).",
|
||||
},
|
||||
"key": {
|
||||
"type": "string",
|
||||
"description": "Key name for press (e.g. 'Enter', 'Tab', 'Escape', 'ArrowDown').",
|
||||
},
|
||||
"wait_for": {
|
||||
"type": "string",
|
||||
"description": "CSS selector to wait for before extracting content (optional).",
|
||||
"description": "CSS selector to wait for before acting (optional).",
|
||||
},
|
||||
"extract_selector": {
|
||||
"type": "string",
|
||||
"description": "CSS selector to extract text from (optional; defaults to full page).",
|
||||
"description": "CSS selector for text extraction in fetch_page (optional; defaults to full page body).",
|
||||
},
|
||||
},
|
||||
"required": ["operation", "url"],
|
||||
"required": ["operation"],
|
||||
}
|
||||
requires_confirmation = False
|
||||
allowed_in_scheduled_tasks = False # Too resource-heavy for scheduled agents
|
||||
requires_confirmation = True # default; read ops override via should_confirm()
|
||||
allowed_in_scheduled_tasks = False
|
||||
|
||||
# Module-level shared browser/playwright (lazy-init, reused)
|
||||
# Shared Playwright/browser instance (lazy-init)
|
||||
_playwright = None
|
||||
_browser = None
|
||||
_lock: ClassVar[asyncio.Lock] = asyncio.Lock()
|
||||
|
||||
async def execute(self, operation: str, url: str = "", wait_for: str = "", extract_selector: str = "", **_) -> ToolResult:
|
||||
if not url:
|
||||
return ToolResult(success=False, error="'url' is required")
|
||||
# Per-session pages: session_id → (context, page)
|
||||
_sessions: ClassVar[dict] = {}
|
||||
|
||||
# Whitelist check (same Tier 1/2 rules as WebTool)
|
||||
denied = await self._check_tier(url)
|
||||
if denied:
|
||||
return denied
|
||||
# ── Confirmation logic ────────────────────────────────────────────────────
|
||||
|
||||
try:
|
||||
from playwright.async_api import async_playwright
|
||||
except ImportError:
|
||||
return ToolResult(
|
||||
success=False,
|
||||
error="Playwright is not installed. Run: pip install playwright && playwright install chromium",
|
||||
)
|
||||
async def should_confirm(self, operation: str = "", url: str = "", **_) -> bool:
|
||||
if operation not in _INTERACTIVE_OPS:
|
||||
return False # read-only ops never need confirmation
|
||||
|
||||
try:
|
||||
# Determine the target hostname
|
||||
target_url = url
|
||||
if not target_url:
|
||||
# Acting on the current page — check its URL
|
||||
sid = current_session_id.get() or "default"
|
||||
data = BrowserTool._sessions.get(sid)
|
||||
if data:
|
||||
try:
|
||||
target_url = data[1].url
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if not target_url:
|
||||
return True # Unknown target → confirm to be safe
|
||||
|
||||
from urllib.parse import urlparse
|
||||
hostname = urlparse(target_url).hostname or ""
|
||||
if not hostname:
|
||||
return True
|
||||
|
||||
from ..context_vars import current_user as _cu
|
||||
user = _cu.get()
|
||||
if not user:
|
||||
return True
|
||||
|
||||
return not await _is_domain_approved(user.id, hostname)
|
||||
|
||||
def confirmation_description(self, operation: str = "", url: str = "",
|
||||
selector: str = "", value: str = "", key: str = "", **_) -> str:
|
||||
loc = url or "current page"
|
||||
if operation == "click":
|
||||
return f"Click '{selector}' on {loc}"
|
||||
if operation == "fill":
|
||||
display_val = value[:40] + "…" if len(value) > 40 else value
|
||||
return f"Type \"{display_val}\" into '{selector}' on {loc}"
|
||||
if operation == "select":
|
||||
return f"Select '{value}' in '{selector}' on {loc}"
|
||||
if operation == "press":
|
||||
return f"Press '{key}' on {loc}"
|
||||
return super().confirmation_description(operation=operation, url=url)
|
||||
|
||||
# ── Session management ────────────────────────────────────────────────────
|
||||
|
||||
async def _get_page(self, session_id: str, url: str | None = None):
|
||||
"""Get or create a page for this session; navigate to url if given."""
|
||||
data = BrowserTool._sessions.get(session_id)
|
||||
page = None
|
||||
if data:
|
||||
context, page = data
|
||||
if page.is_closed():
|
||||
try:
|
||||
await context.close()
|
||||
except Exception:
|
||||
pass
|
||||
page = None
|
||||
|
||||
if page is None:
|
||||
browser = await self._get_browser()
|
||||
context = await browser.new_context(
|
||||
user_agent=(
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
|
||||
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
|
||||
)
|
||||
)
|
||||
context = await browser.new_context(user_agent=_USER_AGENT)
|
||||
page = await context.new_page()
|
||||
try:
|
||||
await page.goto(url, timeout=_TIMEOUT_MS, wait_until="domcontentloaded")
|
||||
BrowserTool._sessions[session_id] = (context, page)
|
||||
|
||||
if wait_for:
|
||||
try:
|
||||
await page.wait_for_selector(wait_for, timeout=10_000)
|
||||
except Exception:
|
||||
pass # continue even if selector doesn't appear
|
||||
if url:
|
||||
await page.goto(url, timeout=_TIMEOUT_MS, wait_until="domcontentloaded")
|
||||
|
||||
if operation == "screenshot":
|
||||
data = await page.screenshot(type="png")
|
||||
import base64
|
||||
return ToolResult(success=True, data={"screenshot_base64": base64.b64encode(data).decode()})
|
||||
|
||||
# fetch_page
|
||||
if extract_selector:
|
||||
elements = await page.query_selector_all(extract_selector)
|
||||
text_parts = [await el.inner_text() for el in elements]
|
||||
text = "\n".join(text_parts)
|
||||
else:
|
||||
text = await page.inner_text("body")
|
||||
|
||||
text = text[:_MAX_TEXT_CHARS]
|
||||
text = await sanitize_external_content(text, source="browser")
|
||||
return ToolResult(success=True, data={"url": url, "text": text, "length": len(text)})
|
||||
finally:
|
||||
await context.close()
|
||||
except Exception as e:
|
||||
return ToolResult(success=False, error=f"Browser error: {e}")
|
||||
return page
|
||||
|
||||
async def _get_browser(self):
|
||||
async with BrowserTool._lock:
|
||||
@@ -129,9 +199,9 @@ class BrowserTool(BaseTool):
|
||||
logger.info("[browser] Chromium launched")
|
||||
return BrowserTool._browser
|
||||
|
||||
# ── Domain access check ───────────────────────────────────────────────────
|
||||
|
||||
async def _check_tier(self, url: str) -> ToolResult | None:
|
||||
"""Returns ToolResult(success=False) if denied, None if allowed."""
|
||||
from urllib.parse import urlparse
|
||||
if await assert_domain_tier1(url):
|
||||
return None
|
||||
task_id = current_task_id.get()
|
||||
@@ -139,11 +209,123 @@ class BrowserTool(BaseTool):
|
||||
return None
|
||||
if web_tier2_enabled.get():
|
||||
return None
|
||||
from urllib.parse import urlparse
|
||||
parsed = urlparse(url)
|
||||
return ToolResult(
|
||||
success=False,
|
||||
error=(
|
||||
f"Domain '{parsed.hostname}' is not in the Tier 1 whitelist. "
|
||||
"Ask me to fetch a specific external page to enable Tier 2 access."
|
||||
"Ask me to access a specific external page to enable Tier 2 access."
|
||||
),
|
||||
)
|
||||
|
||||
# ── Execute ───────────────────────────────────────────────────────────────
|
||||
|
||||
async def execute(
|
||||
self,
|
||||
operation: str,
|
||||
url: str = "",
|
||||
selector: str = "",
|
||||
value: str = "",
|
||||
key: str = "",
|
||||
wait_for: str = "",
|
||||
extract_selector: str = "",
|
||||
**_,
|
||||
) -> ToolResult:
|
||||
# Read ops require a url
|
||||
if operation in ("fetch_page", "screenshot") and not url:
|
||||
return ToolResult(success=False, error=f"'url' is required for {operation}")
|
||||
|
||||
try:
|
||||
from playwright.async_api import async_playwright # noqa: F401
|
||||
except ImportError:
|
||||
return ToolResult(
|
||||
success=False,
|
||||
error="Playwright is not installed. Run: pip install playwright && playwright install chromium",
|
||||
)
|
||||
|
||||
# Whitelist check
|
||||
target_url = url
|
||||
if not target_url:
|
||||
sid = current_session_id.get() or "default"
|
||||
data = BrowserTool._sessions.get(sid)
|
||||
if data:
|
||||
try:
|
||||
target_url = data[1].url
|
||||
except Exception:
|
||||
pass
|
||||
if target_url:
|
||||
denied = await self._check_tier(target_url)
|
||||
if denied:
|
||||
return denied
|
||||
|
||||
session_id = current_session_id.get() or "default"
|
||||
|
||||
try:
|
||||
page = await self._get_page(session_id, url or None)
|
||||
|
||||
if wait_for:
|
||||
try:
|
||||
await page.wait_for_selector(wait_for, timeout=10_000)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# ── Read operations ──────────────────────────────────────────────
|
||||
|
||||
if operation == "fetch_page":
|
||||
if extract_selector:
|
||||
elements = await page.query_selector_all(extract_selector)
|
||||
text_parts = [await el.inner_text() for el in elements]
|
||||
text = "\n".join(text_parts)
|
||||
else:
|
||||
text = await page.inner_text("body")
|
||||
text = text[:_MAX_TEXT_CHARS]
|
||||
text = await sanitize_external_content(text, source="browser")
|
||||
return ToolResult(success=True, data={"url": page.url, "text": text, "length": len(text)})
|
||||
|
||||
if operation == "screenshot":
|
||||
data = await page.screenshot(type="png")
|
||||
import base64
|
||||
return ToolResult(success=True, data={"screenshot_base64": base64.b64encode(data).decode()})
|
||||
|
||||
# ── Interactive operations ───────────────────────────────────────
|
||||
|
||||
if operation == "click":
|
||||
if not selector:
|
||||
return ToolResult(success=False, error="'selector' is required for click")
|
||||
await page.click(selector, timeout=10_000)
|
||||
try:
|
||||
await page.wait_for_load_state("domcontentloaded", timeout=5_000)
|
||||
except Exception:
|
||||
pass
|
||||
preview = (await page.inner_text("body"))[:2000]
|
||||
return ToolResult(success=True, data={"url": page.url, "page_preview": preview})
|
||||
|
||||
if operation == "fill":
|
||||
if not selector:
|
||||
return ToolResult(success=False, error="'selector' is required for fill")
|
||||
await page.fill(selector, value, timeout=10_000)
|
||||
return ToolResult(success=True, data={"url": page.url, "filled": value})
|
||||
|
||||
if operation == "select":
|
||||
if not selector:
|
||||
return ToolResult(success=False, error="'selector' is required for select")
|
||||
await page.select_option(selector, value=value, timeout=10_000)
|
||||
return ToolResult(success=True, data={"url": page.url, "selected": value})
|
||||
|
||||
if operation == "press":
|
||||
if not key:
|
||||
return ToolResult(success=False, error="'key' is required for press")
|
||||
target = selector if selector else "body"
|
||||
await page.press(target, key, timeout=10_000)
|
||||
try:
|
||||
await page.wait_for_load_state("domcontentloaded", timeout=5_000)
|
||||
except Exception:
|
||||
pass
|
||||
preview = (await page.inner_text("body"))[:2000]
|
||||
return ToolResult(success=True, data={"url": page.url, "page_preview": preview})
|
||||
|
||||
return ToolResult(success=False, error=f"Unknown operation: {operation}")
|
||||
|
||||
except Exception as e:
|
||||
return ToolResult(success=False, error=f"Browser error: {e}")
|
||||
|
||||
Reference in New Issue
Block a user