Files
oai-web/server/agents/runner.py
Rune Olsen 7b0a9ccc2b Settings: add dedicated DAV/Pushover tabs, fix CalDAV/CardDAV bugs
- Add admin DAV tab (rename from CalDAV/CardDAV) and Pushover tab
  - Add per-user Pushover tab (User Key only; App Token stays admin-managed)
  - Remove system-wide CalDAV/CardDAV fallback — per-user config only
  - Rewrite contacts_tool.py using httpx directly (caldav 2.x dropped AddressBook)
  - Fix CardDAV REPORT/PROPFIND using SOGo URL pattern
  - Fix CalDAV/CardDAV test endpoints (POST method, URL scheme normalization)
  - Fix Show Password button — API now returns actual credential values
  - Convert Credentials tab to generic key-value store; dedicated keys
    (CalDAV, Pushover, trusted_proxy) excluded via _DEDICATED_CRED_KEYS
2026-04-10 12:06:23 +02:00

333 lines
13 KiB
Python

"""
agents/runner.py — Agent execution and APScheduler integration (async).
Owns the AsyncIOScheduler — schedules and runs all agents (cron + manual).
Each run is tracked in the agent_runs table with token counts.
"""
from __future__ import annotations
import asyncio
import logging
from datetime import datetime, timezone
from apscheduler.schedulers.asyncio import AsyncIOScheduler
from apscheduler.triggers.cron import CronTrigger
from ..agent.agent import Agent, DoneEvent, ErrorEvent
from ..config import settings
from ..database import credential_store
from . import tasks as agent_store
logger = logging.getLogger(__name__)
# Priority levels for the run queue (lower number = higher priority)
PRIORITY_HIGH = 0 # User-initiated chat runs
PRIORITY_NORMAL = 1 # Webhook / inbox / Telegram triggers
PRIORITY_LOW = 2 # Background monitors
_DEFAULT_MAX_CONCURRENT = 3
class AgentRunner:
def __init__(self) -> None:
self._agent: Agent | None = None
self._scheduler = AsyncIOScheduler(timezone=settings.timezone)
self._running: dict[str, asyncio.Task] = {} # run_id → asyncio.Task
# Concurrency semaphore — initialised in start() once event loop is running
self._semaphore: asyncio.Semaphore | None = None
self._max_concurrent: int = _DEFAULT_MAX_CONCURRENT
@property
def scheduler(self) -> AsyncIOScheduler:
return self._scheduler
@property
def queue_status(self) -> dict:
running = sum(1 for t in self._running.values() if not t.done())
# Tasks waiting for the semaphore are counted as "queued"
queued = max(0, running - self._max_concurrent)
return {"running": min(running, self._max_concurrent), "queued": queued, "max_concurrent": self._max_concurrent}
def init(self, agent: Agent) -> None:
self._agent = agent
async def _load_max_concurrent(self) -> int:
val = await credential_store.get("system:max_concurrent_runs")
try:
return max(1, int(val)) if val else _DEFAULT_MAX_CONCURRENT
except (ValueError, TypeError):
return _DEFAULT_MAX_CONCURRENT
async def start(self) -> None:
"""Load all enabled agents with schedules into APScheduler and start it."""
# Initialise concurrency semaphore (must happen inside a running event loop)
self._max_concurrent = await self._load_max_concurrent()
self._semaphore = asyncio.Semaphore(self._max_concurrent)
for agent in await agent_store.list_agents():
if agent["enabled"] and agent["schedule"]:
self._add_job(agent)
# Daily audit log rotation at 03:00
self._scheduler.add_job(
self._rotate_audit_log,
trigger=CronTrigger(hour=3, minute=0, timezone=settings.timezone),
id="system:audit-rotation",
replace_existing=True,
misfire_grace_time=3600,
)
self._scheduler.start()
logger.info(
"[agent-runner] Scheduler started, max_concurrent=%d", self._max_concurrent
)
def shutdown(self) -> None:
if self._scheduler.running:
self._scheduler.shutdown(wait=False)
logger.info("[agent-runner] Scheduler stopped")
def _add_job(self, agent: dict) -> None:
try:
self._scheduler.add_job(
self._run_agent_scheduled,
trigger=CronTrigger.from_crontab(
agent["schedule"], timezone=settings.timezone
),
id=f"agent:{agent['id']}",
args=[agent["id"]],
replace_existing=True,
misfire_grace_time=300,
)
logger.info(
f"[agent-runner] Scheduled agent '{agent['name']}' ({agent['schedule']})"
)
except Exception as e:
logger.error(
f"[agent-runner] Failed to schedule agent '{agent['name']}': {e}"
)
def reschedule(self, agent: dict) -> None:
job_id = f"agent:{agent['id']}"
try:
self._scheduler.remove_job(job_id)
except Exception:
pass
if agent["enabled"] and agent["schedule"]:
self._add_job(agent)
def remove(self, agent_id: str) -> None:
try:
self._scheduler.remove_job(f"agent:{agent_id}")
except Exception:
pass
# ── Execution ─────────────────────────────────────────────────────────────
async def run_agent_now(self, agent_id: str, override_message: str | None = None) -> dict:
"""UI-triggered run — bypasses schedule, returns run dict."""
return await self._run_agent(agent_id, ignore_rate_limit=True, override_message=override_message)
async def run_agent_and_wait(
self,
agent_id: str,
override_message: str,
session_id: str | None = None,
extra_tools: list | None = None,
force_only_extra_tools: bool = False,
) -> str:
"""Run an agent, wait for it to finish, and return the final response text."""
run = await self._run_agent(
agent_id,
ignore_rate_limit=True,
override_message=override_message,
session_id=session_id,
extra_tools=extra_tools,
force_only_extra_tools=force_only_extra_tools,
)
if "id" not in run:
logger.warning("[agent-runner] run_agent_and_wait failed for agent %s: %s", agent_id, run.get("error"))
return f"Could not run agent: {run.get('error', 'unknown error')}"
run_id = run["id"]
task = self._running.get(run_id)
if task:
try:
await asyncio.wait_for(asyncio.shield(task), timeout=300)
except (asyncio.TimeoutError, asyncio.CancelledError):
pass
row = await agent_store.get_run(run_id)
return (row.get("result") or "(no response)") if row else "(no response)"
async def _rotate_audit_log(self) -> None:
"""Called daily by APScheduler. Purges audit entries older than the configured retention."""
from ..audit import audit_log
days_str = await credential_store.get("system:audit_retention_days")
days = int(days_str) if days_str else 0
if days <= 0:
return
deleted = await audit_log.purge(older_than_days=days)
logger.info("[agent-runner] Audit rotation: deleted %d entries older than %d days", deleted, days)
async def _run_agent_scheduled(self, agent_id: str) -> None:
"""Called by APScheduler — fire and forget."""
await self._run_agent(agent_id, ignore_rate_limit=False)
async def _run_agent(
self,
agent_id: str,
ignore_rate_limit: bool = False,
override_message: str | None = None,
session_id: str | None = None,
extra_tools: list | None = None,
force_only_extra_tools: bool = False,
) -> dict:
agent_data = await agent_store.get_agent(agent_id)
if not agent_data:
logger.warning("[agent-runner] _run_agent: agent %s not found", agent_id)
return {"error": "Agent not found"}
if not agent_data["enabled"] and not ignore_rate_limit:
logger.warning("[agent-runner] _run_agent: agent %s is disabled", agent_id)
return {"error": "Agent is disabled"}
# Kill switch
if await credential_store.get("system:paused") == "1":
logger.warning("[agent-runner] _run_agent: system is paused")
return {"error": "Agent is paused"}
if self._agent is None:
logger.warning("[agent-runner] _run_agent: agent runner not initialized")
return {"error": "Agent not initialized"}
# allowed_tools is JSONB, normalised to list|None in _agent_row()
raw = agent_data.get("allowed_tools")
allowed_tools: list[str] | None = raw if raw else None
# Resolve agent owner's admin status — bash is never available to non-admin owners
# Also block execution if the owner account has been deactivated.
owner_is_admin = True
owner_id = agent_data.get("owner_user_id")
if owner_id:
from ..users import get_user_by_id as _get_user
owner = await _get_user(owner_id)
if owner and not owner.get("is_active", True):
logger.warning(
"[agent-runner] Skipping agent '%s' — owner account is deactivated",
agent_data["name"],
)
return {"error": "Owner account is deactivated"}
owner_is_admin = (owner["role"] == "admin") if owner else True
if not owner_is_admin:
if allowed_tools is None:
all_names = [t.name for t in self._agent._registry.all_tools()]
allowed_tools = [t for t in all_names if t != "bash"]
else:
allowed_tools = [t for t in allowed_tools if t != "bash"]
# Create run record
run = await agent_store.create_run(agent_id)
run_id = run["id"]
logger.info(
f"[agent-runner] Running agent '{agent_data['name']}' run={run_id[:8]}"
)
# Per-agent max_tool_calls override (None = use system default)
max_tool_calls: int | None = agent_data.get("max_tool_calls") or None
async def _execute():
input_tokens = 0
output_tokens = 0
final_text = ""
try:
from ..agent.agent import _build_system_prompt
prompt_mode = agent_data.get("prompt_mode") or "combined"
agent_prompt = agent_data["prompt"]
system_override: str | None = None
if override_message:
run_message = override_message
if prompt_mode == "agent_only":
system_override = agent_prompt
elif prompt_mode == "combined":
system_override = agent_prompt + "\n\n---\n\n" + await _build_system_prompt(user_id=owner_id)
else:
run_message = agent_prompt
if prompt_mode == "agent_only":
system_override = agent_prompt
elif prompt_mode == "combined":
system_override = agent_prompt + "\n\n---\n\n" + await _build_system_prompt(user_id=owner_id)
stream = await self._agent.run(
message=run_message,
session_id=session_id or f"agent:{run_id}",
task_id=run_id,
allowed_tools=allowed_tools,
model=agent_data.get("model") or None,
max_tool_calls=max_tool_calls,
system_override=system_override,
user_id=owner_id,
extra_tools=extra_tools,
force_only_extra_tools=force_only_extra_tools,
)
async for event in stream:
if isinstance(event, DoneEvent):
final_text = event.text or "Done"
input_tokens = event.usage.input_tokens
output_tokens = event.usage.output_tokens
elif isinstance(event, ErrorEvent):
final_text = f"Error: {event.message}"
await agent_store.finish_run(
run_id,
status="success",
input_tokens=input_tokens,
output_tokens=output_tokens,
result=final_text,
)
logger.info(
f"[agent-runner] Agent '{agent_data['name']}' run={run_id[:8]} completed OK"
)
except asyncio.CancelledError:
await agent_store.finish_run(run_id, status="stopped")
logger.info(f"[agent-runner] Run {run_id[:8]} stopped")
except Exception as e:
logger.error(f"[agent-runner] Run {run_id[:8]} failed: {e}")
await agent_store.finish_run(run_id, status="error", error=str(e))
finally:
self._running.pop(run_id, None)
async def _execute_with_semaphore():
if self._semaphore:
async with self._semaphore:
await _execute()
else:
await _execute()
task = asyncio.create_task(_execute_with_semaphore())
self._running[run_id] = task
return await agent_store.get_run(run_id)
def stop_run(self, run_id: str) -> bool:
task = self._running.get(run_id)
if task and not task.done():
task.cancel()
return True
return False
def is_running(self, run_id: str) -> bool:
task = self._running.get(run_id)
return task is not None and not task.done()
async def find_active_run(self, agent_id: str) -> str | None:
"""Return run_id of an in-progress run for this agent, or None."""
for run_id, task in self._running.items():
if not task.done():
run = await agent_store.get_run(run_id)
if run and run["agent_id"] == agent_id:
return run_id
return None
# Module-level singleton
agent_runner = AgentRunner()