Files
oai-web/server/tools/browser_tool.py
Rune Olsen 7b0a9ccc2b Settings: add dedicated DAV/Pushover tabs, fix CalDAV/CardDAV bugs
- Add admin DAV tab (rename from CalDAV/CardDAV) and Pushover tab
  - Add per-user Pushover tab (User Key only; App Token stays admin-managed)
  - Remove system-wide CalDAV/CardDAV fallback — per-user config only
  - Rewrite contacts_tool.py using httpx directly (caldav 2.x dropped AddressBook)
  - Fix CardDAV REPORT/PROPFIND using SOGo URL pattern
  - Fix CalDAV/CardDAV test endpoints (POST method, URL scheme normalization)
  - Fix Show Password button — API now returns actual credential values
  - Convert Credentials tab to generic key-value store; dedicated keys
    (CalDAV, Pushover, trusted_proxy) excluded via _DEDICATED_CRED_KEYS
2026-04-10 12:06:23 +02:00

150 lines
5.7 KiB
Python

"""
tools/browser_tool.py — Playwright headless browser tool.
For JS-heavy pages that httpx can't render. Enforces the same Tier 1/2
web whitelist as WebTool. Browser instance is lazy-initialized and shared
across calls.
Requires: playwright package + `playwright install chromium`
"""
from __future__ import annotations
import asyncio
import logging
from typing import ClassVar
from ..context_vars import current_task_id, web_tier2_enabled
from ..security import assert_domain_tier1, sanitize_external_content
from .base import BaseTool, ToolResult
logger = logging.getLogger(__name__)
_MAX_TEXT_CHARS = 25_000
_TIMEOUT_MS = 30_000
class BrowserTool(BaseTool):
name = "browser"
description = (
"Fetch web pages using a real headless browser (Chromium). "
"Use this for JS-heavy pages or single-page apps that the regular 'web' tool cannot read. "
"Operations: fetch_page (extract text content), screenshot (base64 PNG). "
"Follows the same domain whitelist rules as the web tool."
)
input_schema = {
"type": "object",
"properties": {
"operation": {
"type": "string",
"enum": ["fetch_page", "screenshot"],
"description": "fetch_page extracts text; screenshot returns a base64 PNG.",
},
"url": {
"type": "string",
"description": "URL to navigate to.",
},
"wait_for": {
"type": "string",
"description": "CSS selector to wait for before extracting content (optional).",
},
"extract_selector": {
"type": "string",
"description": "CSS selector to extract text from (optional; defaults to full page).",
},
},
"required": ["operation", "url"],
}
requires_confirmation = False
allowed_in_scheduled_tasks = False # Too resource-heavy for scheduled agents
# Module-level shared browser/playwright (lazy-init, reused)
_playwright = None
_browser = None
_lock: ClassVar[asyncio.Lock] = asyncio.Lock()
async def execute(self, operation: str, url: str = "", wait_for: str = "", extract_selector: str = "", **_) -> ToolResult:
if not url:
return ToolResult(success=False, error="'url' is required")
# Whitelist check (same Tier 1/2 rules as WebTool)
denied = await self._check_tier(url)
if denied:
return denied
try:
from playwright.async_api import async_playwright
except ImportError:
return ToolResult(
success=False,
error="Playwright is not installed. Run: pip install playwright && playwright install chromium",
)
try:
browser = await self._get_browser()
context = await browser.new_context(
user_agent=(
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
)
)
page = await context.new_page()
try:
await page.goto(url, timeout=_TIMEOUT_MS, wait_until="domcontentloaded")
if wait_for:
try:
await page.wait_for_selector(wait_for, timeout=10_000)
except Exception:
pass # continue even if selector doesn't appear
if operation == "screenshot":
data = await page.screenshot(type="png")
import base64
return ToolResult(success=True, data={"screenshot_base64": base64.b64encode(data).decode()})
# fetch_page
if extract_selector:
elements = await page.query_selector_all(extract_selector)
text_parts = [await el.inner_text() for el in elements]
text = "\n".join(text_parts)
else:
text = await page.inner_text("body")
text = text[:_MAX_TEXT_CHARS]
text = await sanitize_external_content(text, source="browser")
return ToolResult(success=True, data={"url": url, "text": text, "length": len(text)})
finally:
await context.close()
except Exception as e:
return ToolResult(success=False, error=f"Browser error: {e}")
async def _get_browser(self):
async with BrowserTool._lock:
if BrowserTool._browser is None or not BrowserTool._browser.is_connected():
from playwright.async_api import async_playwright
BrowserTool._playwright = await async_playwright().start()
BrowserTool._browser = await BrowserTool._playwright.chromium.launch(
args=["--no-sandbox", "--disable-dev-shm-usage"],
)
logger.info("[browser] Chromium launched")
return BrowserTool._browser
async def _check_tier(self, url: str) -> ToolResult | None:
"""Returns ToolResult(success=False) if denied, None if allowed."""
from urllib.parse import urlparse
if await assert_domain_tier1(url):
return None
task_id = current_task_id.get()
if task_id is not None:
return None
if web_tier2_enabled.get():
return None
parsed = urlparse(url)
return ToolResult(
success=False,
error=(
f"Domain '{parsed.hostname}' is not in the Tier 1 whitelist. "
"Ask me to fetch a specific external page to enable Tier 2 access."
),
)