Files
oai-web/server/monitors/page_monitor.py
Rune Olsen 7b0a9ccc2b Settings: add dedicated DAV/Pushover tabs, fix CalDAV/CardDAV bugs
- Add admin DAV tab (rename from CalDAV/CardDAV) and Pushover tab
  - Add per-user Pushover tab (User Key only; App Token stays admin-managed)
  - Remove system-wide CalDAV/CardDAV fallback — per-user config only
  - Rewrite contacts_tool.py using httpx directly (caldav 2.x dropped AddressBook)
  - Fix CardDAV REPORT/PROPFIND using SOGo URL pattern
  - Fix CalDAV/CardDAV test endpoints (POST method, URL scheme normalization)
  - Fix Show Password button — API now returns actual credential values
  - Convert Credentials tab to generic key-value store; dedicated keys
    (CalDAV, Pushover, trusted_proxy) excluded via _DEDICATED_CRED_KEYS
2026-04-10 12:06:23 +02:00

176 lines
6.0 KiB
Python

"""
monitors/page_monitor.py — Page change monitor.
Polls watched URLs on a cron schedule, hashes the content, and dispatches
an agent (or Pushover notification) when the page content changes.
"""
from __future__ import annotations
import hashlib
import logging
import httpx
from apscheduler.schedulers.asyncio import AsyncIOScheduler
from apscheduler.triggers.cron import CronTrigger
from ..config import settings
from . import store
logger = logging.getLogger(__name__)
_DEFAULT_HEADERS = {
"User-Agent": "Mozilla/5.0 (compatible; oAI-Web page-monitor/1.0)",
}
async def _fetch_page_content(url: str, css_selector: str | None = None) -> str:
"""Fetch URL and return plain text (optionally filtered by CSS selector)."""
async with httpx.AsyncClient(
timeout=30.0,
follow_redirects=True,
headers=_DEFAULT_HEADERS,
) as client:
resp = await client.get(url)
resp.raise_for_status()
html = resp.text
if css_selector:
try:
from bs4 import BeautifulSoup
soup = BeautifulSoup(html, "html.parser")
elements = soup.select(css_selector)
return "\n".join(el.get_text(separator=" ", strip=True) for el in elements)
except Exception as e:
logger.warning("[page-monitor] CSS selector '%s' failed: %s", css_selector, e)
try:
from bs4 import BeautifulSoup
soup = BeautifulSoup(html, "html.parser")
for tag in soup(["script", "style", "nav", "footer", "header"]):
tag.decompose()
return soup.get_text(separator="\n", strip=True)
except Exception:
return html
def _content_hash(text: str) -> str:
return hashlib.sha256(text.encode("utf-8", errors="replace")).hexdigest()
class PageMonitorManager:
"""Manages APScheduler jobs for all watched_pages entries."""
def __init__(self) -> None:
self._scheduler: AsyncIOScheduler | None = None
def init(self, scheduler: AsyncIOScheduler) -> None:
"""Share the AgentRunner's scheduler."""
self._scheduler = scheduler
async def start_all(self) -> None:
"""Load all enabled watched pages and register APScheduler jobs."""
pages = await store.list_watched_pages()
for page in pages:
if page["enabled"]:
self._add_job(page)
logger.info("[page-monitor] Registered %d page monitor jobs", len([p for p in pages if p["enabled"]]))
def _add_job(self, page: dict) -> None:
if not self._scheduler:
return
try:
self._scheduler.add_job(
self._check_page,
trigger=CronTrigger.from_crontab(page["schedule"], timezone=settings.timezone),
id=f"page:{page['id']}",
args=[str(page["id"])],
replace_existing=True,
misfire_grace_time=300,
)
except Exception as e:
logger.error("[page-monitor] Failed to schedule page '%s': %s", page["name"], e)
def reschedule(self, page: dict) -> None:
if not self._scheduler:
return
job_id = f"page:{page['id']}"
try:
self._scheduler.remove_job(job_id)
except Exception:
pass
if page.get("enabled"):
self._add_job(page)
def remove(self, page_id: str) -> None:
if not self._scheduler:
return
try:
self._scheduler.remove_job(f"page:{page_id}")
except Exception:
pass
async def check_now(self, page_id: str) -> dict:
"""Force-check a page immediately (UI-triggered). Returns status dict."""
return await self._check_page(page_id)
async def _check_page(self, page_id: str) -> dict:
page = await store.get_watched_page(page_id)
if not page:
return {"error": "Page not found"}
logger.info("[page-monitor] Checking '%s' (%s)", page["name"], page["url"])
try:
content = await _fetch_page_content(page["url"], page.get("css_selector"))
except Exception as e:
error_msg = str(e)[:200]
logger.warning("[page-monitor] Failed to fetch '%s': %s", page["url"], error_msg)
await store.update_page_check_result(page_id, None, False, error=error_msg)
return {"error": error_msg}
new_hash = _content_hash(content)
old_hash = page.get("last_content_hash")
changed = old_hash is not None and new_hash != old_hash
await store.update_page_check_result(page_id, new_hash, changed)
if changed:
logger.info("[page-monitor] Change detected on '%s'", page["name"])
await self._dispatch_change(page, content)
return {"changed": changed, "hash": new_hash, "first_check": old_hash is None}
async def _dispatch_change(self, page: dict, content: str) -> None:
mode = page.get("notification_mode", "agent")
message = (
f"Page change detected: {page['name']}\n"
f"URL: {page['url']}\n\n"
f"Current content (first 2000 chars):\n{content[:2000]}"
)
if mode in ("pushover", "both"):
try:
from ..tools.pushover_tool import PushoverTool
await PushoverTool().execute(
title=f"Page changed: {page['name']}",
message=f"{page['url']} has new content.",
priority=0,
)
except Exception as e:
logger.warning("[page-monitor] Pushover notify failed: %s", e)
if mode in ("agent", "both"):
agent_id = page.get("agent_id")
if agent_id:
try:
from ..agents.runner import agent_runner
await agent_runner.run_agent_now(
agent_id=agent_id,
override_message=message,
)
except Exception as e:
logger.warning("[page-monitor] Agent dispatch failed: %s", e)
page_monitor = PageMonitorManager()