""" agents/runner.py — Agent execution and APScheduler integration (async). Owns the AsyncIOScheduler — schedules and runs all agents (cron + manual). Each run is tracked in the agent_runs table with token counts. """ from __future__ import annotations import asyncio import logging from datetime import datetime, timezone from apscheduler.schedulers.asyncio import AsyncIOScheduler from apscheduler.triggers.cron import CronTrigger from ..agent.agent import Agent, DoneEvent, ErrorEvent from ..config import settings from ..database import credential_store from . import tasks as agent_store logger = logging.getLogger(__name__) class AgentRunner: def __init__(self) -> None: self._agent: Agent | None = None self._scheduler = AsyncIOScheduler(timezone=settings.timezone) self._running: dict[str, asyncio.Task] = {} # run_id → asyncio.Task def init(self, agent: Agent) -> None: self._agent = agent async def start(self) -> None: """Load all enabled agents with schedules into APScheduler and start it.""" for agent in await agent_store.list_agents(): if agent["enabled"] and agent["schedule"]: self._add_job(agent) # Daily audit log rotation at 03:00 self._scheduler.add_job( self._rotate_audit_log, trigger=CronTrigger(hour=3, minute=0, timezone=settings.timezone), id="system:audit-rotation", replace_existing=True, misfire_grace_time=3600, ) self._scheduler.start() logger.info("[agent-runner] Scheduler started, loaded scheduled agents") def shutdown(self) -> None: if self._scheduler.running: self._scheduler.shutdown(wait=False) logger.info("[agent-runner] Scheduler stopped") def _add_job(self, agent: dict) -> None: try: self._scheduler.add_job( self._run_agent_scheduled, trigger=CronTrigger.from_crontab( agent["schedule"], timezone=settings.timezone ), id=f"agent:{agent['id']}", args=[agent["id"]], replace_existing=True, misfire_grace_time=300, ) logger.info( f"[agent-runner] Scheduled agent '{agent['name']}' ({agent['schedule']})" ) except Exception as e: logger.error( f"[agent-runner] Failed to schedule agent '{agent['name']}': {e}" ) def reschedule(self, agent: dict) -> None: job_id = f"agent:{agent['id']}" try: self._scheduler.remove_job(job_id) except Exception: pass if agent["enabled"] and agent["schedule"]: self._add_job(agent) def remove(self, agent_id: str) -> None: try: self._scheduler.remove_job(f"agent:{agent_id}") except Exception: pass # ── Execution ───────────────────────────────────────────────────────────── async def run_agent_now(self, agent_id: str, override_message: str | None = None) -> dict: """UI-triggered run — bypasses schedule, returns run dict.""" return await self._run_agent(agent_id, ignore_rate_limit=True, override_message=override_message) async def run_agent_and_wait( self, agent_id: str, override_message: str, session_id: str | None = None, extra_tools: list | None = None, force_only_extra_tools: bool = False, ) -> str: """Run an agent, wait for it to finish, and return the final response text.""" run = await self._run_agent( agent_id, ignore_rate_limit=True, override_message=override_message, session_id=session_id, extra_tools=extra_tools, force_only_extra_tools=force_only_extra_tools, ) if "id" not in run: logger.warning("[agent-runner] run_agent_and_wait failed for agent %s: %s", agent_id, run.get("error")) return f"Could not run agent: {run.get('error', 'unknown error')}" run_id = run["id"] task = self._running.get(run_id) if task: try: await asyncio.wait_for(asyncio.shield(task), timeout=300) except (asyncio.TimeoutError, asyncio.CancelledError): pass row = await agent_store.get_run(run_id) return (row.get("result") or "(no response)") if row else "(no response)" async def _rotate_audit_log(self) -> None: """Called daily by APScheduler. Purges audit entries older than the configured retention.""" from ..audit import audit_log days_str = await credential_store.get("system:audit_retention_days") days = int(days_str) if days_str else 0 if days <= 0: return deleted = await audit_log.purge(older_than_days=days) logger.info("[agent-runner] Audit rotation: deleted %d entries older than %d days", deleted, days) async def _run_agent_scheduled(self, agent_id: str) -> None: """Called by APScheduler — fire and forget.""" await self._run_agent(agent_id, ignore_rate_limit=False) async def _run_agent( self, agent_id: str, ignore_rate_limit: bool = False, override_message: str | None = None, session_id: str | None = None, extra_tools: list | None = None, force_only_extra_tools: bool = False, ) -> dict: agent_data = await agent_store.get_agent(agent_id) if not agent_data: logger.warning("[agent-runner] _run_agent: agent %s not found", agent_id) return {"error": "Agent not found"} if not agent_data["enabled"] and not ignore_rate_limit: logger.warning("[agent-runner] _run_agent: agent %s is disabled", agent_id) return {"error": "Agent is disabled"} # Kill switch if await credential_store.get("system:paused") == "1": logger.warning("[agent-runner] _run_agent: system is paused") return {"error": "Agent is paused"} if self._agent is None: logger.warning("[agent-runner] _run_agent: agent runner not initialized") return {"error": "Agent not initialized"} # allowed_tools is JSONB, normalised to list|None in _agent_row() raw = agent_data.get("allowed_tools") allowed_tools: list[str] | None = raw if raw else None # Resolve agent owner's admin status — bash is never available to non-admin owners # Also block execution if the owner account has been deactivated. owner_is_admin = True owner_id = agent_data.get("owner_user_id") if owner_id: from ..users import get_user_by_id as _get_user owner = await _get_user(owner_id) if owner and not owner.get("is_active", True): logger.warning( "[agent-runner] Skipping agent '%s' — owner account is deactivated", agent_data["name"], ) return {"error": "Owner account is deactivated"} owner_is_admin = (owner["role"] == "admin") if owner else True if not owner_is_admin: if allowed_tools is None: all_names = [t.name for t in self._agent._registry.all_tools()] allowed_tools = [t for t in all_names if t != "bash"] else: allowed_tools = [t for t in allowed_tools if t != "bash"] # Create run record run = await agent_store.create_run(agent_id) run_id = run["id"] logger.info( f"[agent-runner] Running agent '{agent_data['name']}' run={run_id[:8]}" ) # Per-agent max_tool_calls override (None = use system default) max_tool_calls: int | None = agent_data.get("max_tool_calls") or None async def _execute(): input_tokens = 0 output_tokens = 0 final_text = "" try: from ..agent.agent import _build_system_prompt prompt_mode = agent_data.get("prompt_mode") or "combined" agent_prompt = agent_data["prompt"] system_override: str | None = None if override_message: run_message = override_message if prompt_mode == "agent_only": system_override = agent_prompt elif prompt_mode == "combined": system_override = agent_prompt + "\n\n---\n\n" + await _build_system_prompt(user_id=owner_id) else: run_message = agent_prompt if prompt_mode == "agent_only": system_override = agent_prompt elif prompt_mode == "combined": system_override = agent_prompt + "\n\n---\n\n" + await _build_system_prompt(user_id=owner_id) stream = await self._agent.run( message=run_message, session_id=session_id or f"agent:{run_id}", task_id=run_id, allowed_tools=allowed_tools, model=agent_data.get("model") or None, max_tool_calls=max_tool_calls, system_override=system_override, user_id=owner_id, extra_tools=extra_tools, force_only_extra_tools=force_only_extra_tools, ) async for event in stream: if isinstance(event, DoneEvent): final_text = event.text or "Done" input_tokens = event.usage.input_tokens output_tokens = event.usage.output_tokens elif isinstance(event, ErrorEvent): final_text = f"Error: {event.message}" await agent_store.finish_run( run_id, status="success", input_tokens=input_tokens, output_tokens=output_tokens, result=final_text, ) logger.info( f"[agent-runner] Agent '{agent_data['name']}' run={run_id[:8]} completed OK" ) except asyncio.CancelledError: await agent_store.finish_run(run_id, status="stopped") logger.info(f"[agent-runner] Run {run_id[:8]} stopped") except Exception as e: logger.error(f"[agent-runner] Run {run_id[:8]} failed: {e}") await agent_store.finish_run(run_id, status="error", error=str(e)) finally: self._running.pop(run_id, None) task = asyncio.create_task(_execute()) self._running[run_id] = task return await agent_store.get_run(run_id) def stop_run(self, run_id: str) -> bool: task = self._running.get(run_id) if task and not task.done(): task.cancel() return True return False def is_running(self, run_id: str) -> bool: task = self._running.get(run_id) return task is not None and not task.done() async def find_active_run(self, agent_id: str) -> str | None: """Return run_id of an in-progress run for this agent, or None.""" for run_id, task in self._running.items(): if not task.done(): run = await agent_store.get_run(run_id) if run and run["agent_id"] == agent_id: return run_id return None # Module-level singleton agent_runner = AgentRunner()