- Add admin DAV tab (rename from CalDAV/CardDAV) and Pushover tab
- Add per-user Pushover tab (User Key only; App Token stays admin-managed)
- Remove system-wide CalDAV/CardDAV fallback — per-user config only
- Rewrite contacts_tool.py using httpx directly (caldav 2.x dropped AddressBook)
- Fix CardDAV REPORT/PROPFIND using SOGo URL pattern
- Fix CalDAV/CardDAV test endpoints (POST method, URL scheme normalization)
- Fix Show Password button — API now returns actual credential values
- Convert Credentials tab to generic key-value store; dedicated keys
(CalDAV, Pushover, trusted_proxy) excluded via _DEDICATED_CRED_KEYS
176 lines
6.0 KiB
Python
176 lines
6.0 KiB
Python
"""
|
|
monitors/page_monitor.py — Page change monitor.
|
|
|
|
Polls watched URLs on a cron schedule, hashes the content, and dispatches
|
|
an agent (or Pushover notification) when the page content changes.
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import hashlib
|
|
import logging
|
|
|
|
import httpx
|
|
from apscheduler.schedulers.asyncio import AsyncIOScheduler
|
|
from apscheduler.triggers.cron import CronTrigger
|
|
|
|
from ..config import settings
|
|
from . import store
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
_DEFAULT_HEADERS = {
|
|
"User-Agent": "Mozilla/5.0 (compatible; oAI-Web page-monitor/1.0)",
|
|
}
|
|
|
|
|
|
async def _fetch_page_content(url: str, css_selector: str | None = None) -> str:
|
|
"""Fetch URL and return plain text (optionally filtered by CSS selector)."""
|
|
async with httpx.AsyncClient(
|
|
timeout=30.0,
|
|
follow_redirects=True,
|
|
headers=_DEFAULT_HEADERS,
|
|
) as client:
|
|
resp = await client.get(url)
|
|
resp.raise_for_status()
|
|
html = resp.text
|
|
|
|
if css_selector:
|
|
try:
|
|
from bs4 import BeautifulSoup
|
|
soup = BeautifulSoup(html, "html.parser")
|
|
elements = soup.select(css_selector)
|
|
return "\n".join(el.get_text(separator=" ", strip=True) for el in elements)
|
|
except Exception as e:
|
|
logger.warning("[page-monitor] CSS selector '%s' failed: %s", css_selector, e)
|
|
|
|
try:
|
|
from bs4 import BeautifulSoup
|
|
soup = BeautifulSoup(html, "html.parser")
|
|
for tag in soup(["script", "style", "nav", "footer", "header"]):
|
|
tag.decompose()
|
|
return soup.get_text(separator="\n", strip=True)
|
|
except Exception:
|
|
return html
|
|
|
|
|
|
def _content_hash(text: str) -> str:
|
|
return hashlib.sha256(text.encode("utf-8", errors="replace")).hexdigest()
|
|
|
|
|
|
class PageMonitorManager:
|
|
"""Manages APScheduler jobs for all watched_pages entries."""
|
|
|
|
def __init__(self) -> None:
|
|
self._scheduler: AsyncIOScheduler | None = None
|
|
|
|
def init(self, scheduler: AsyncIOScheduler) -> None:
|
|
"""Share the AgentRunner's scheduler."""
|
|
self._scheduler = scheduler
|
|
|
|
async def start_all(self) -> None:
|
|
"""Load all enabled watched pages and register APScheduler jobs."""
|
|
pages = await store.list_watched_pages()
|
|
for page in pages:
|
|
if page["enabled"]:
|
|
self._add_job(page)
|
|
logger.info("[page-monitor] Registered %d page monitor jobs", len([p for p in pages if p["enabled"]]))
|
|
|
|
def _add_job(self, page: dict) -> None:
|
|
if not self._scheduler:
|
|
return
|
|
try:
|
|
self._scheduler.add_job(
|
|
self._check_page,
|
|
trigger=CronTrigger.from_crontab(page["schedule"], timezone=settings.timezone),
|
|
id=f"page:{page['id']}",
|
|
args=[str(page["id"])],
|
|
replace_existing=True,
|
|
misfire_grace_time=300,
|
|
)
|
|
except Exception as e:
|
|
logger.error("[page-monitor] Failed to schedule page '%s': %s", page["name"], e)
|
|
|
|
def reschedule(self, page: dict) -> None:
|
|
if not self._scheduler:
|
|
return
|
|
job_id = f"page:{page['id']}"
|
|
try:
|
|
self._scheduler.remove_job(job_id)
|
|
except Exception:
|
|
pass
|
|
if page.get("enabled"):
|
|
self._add_job(page)
|
|
|
|
def remove(self, page_id: str) -> None:
|
|
if not self._scheduler:
|
|
return
|
|
try:
|
|
self._scheduler.remove_job(f"page:{page_id}")
|
|
except Exception:
|
|
pass
|
|
|
|
async def check_now(self, page_id: str) -> dict:
|
|
"""Force-check a page immediately (UI-triggered). Returns status dict."""
|
|
return await self._check_page(page_id)
|
|
|
|
async def _check_page(self, page_id: str) -> dict:
|
|
page = await store.get_watched_page(page_id)
|
|
if not page:
|
|
return {"error": "Page not found"}
|
|
|
|
logger.info("[page-monitor] Checking '%s' (%s)", page["name"], page["url"])
|
|
|
|
try:
|
|
content = await _fetch_page_content(page["url"], page.get("css_selector"))
|
|
except Exception as e:
|
|
error_msg = str(e)[:200]
|
|
logger.warning("[page-monitor] Failed to fetch '%s': %s", page["url"], error_msg)
|
|
await store.update_page_check_result(page_id, None, False, error=error_msg)
|
|
return {"error": error_msg}
|
|
|
|
new_hash = _content_hash(content)
|
|
old_hash = page.get("last_content_hash")
|
|
changed = old_hash is not None and new_hash != old_hash
|
|
|
|
await store.update_page_check_result(page_id, new_hash, changed)
|
|
|
|
if changed:
|
|
logger.info("[page-monitor] Change detected on '%s'", page["name"])
|
|
await self._dispatch_change(page, content)
|
|
|
|
return {"changed": changed, "hash": new_hash, "first_check": old_hash is None}
|
|
|
|
async def _dispatch_change(self, page: dict, content: str) -> None:
|
|
mode = page.get("notification_mode", "agent")
|
|
message = (
|
|
f"Page change detected: {page['name']}\n"
|
|
f"URL: {page['url']}\n\n"
|
|
f"Current content (first 2000 chars):\n{content[:2000]}"
|
|
)
|
|
|
|
if mode in ("pushover", "both"):
|
|
try:
|
|
from ..tools.pushover_tool import PushoverTool
|
|
await PushoverTool().execute(
|
|
title=f"Page changed: {page['name']}",
|
|
message=f"{page['url']} has new content.",
|
|
priority=0,
|
|
)
|
|
except Exception as e:
|
|
logger.warning("[page-monitor] Pushover notify failed: %s", e)
|
|
|
|
if mode in ("agent", "both"):
|
|
agent_id = page.get("agent_id")
|
|
if agent_id:
|
|
try:
|
|
from ..agents.runner import agent_runner
|
|
await agent_runner.run_agent_now(
|
|
agent_id=agent_id,
|
|
override_message=message,
|
|
)
|
|
except Exception as e:
|
|
logger.warning("[page-monitor] Agent dispatch failed: %s", e)
|
|
|
|
|
|
page_monitor = PageMonitorManager()
|