""" brain/metadata.py — LLM-based metadata extraction. Extracts structured metadata from a thought using a fast model (gpt-4o-mini via OpenRouter). Returns type classification, tags, people, and action items. """ from __future__ import annotations import json import logging logger = logging.getLogger(__name__) _MODEL = "openai/gpt-4o-mini" _SYSTEM_PROMPT = """\ You are a metadata extractor for a personal knowledge base. Given a thought, extract structured metadata and return ONLY valid JSON — no explanation, no markdown. JSON schema: { "type": "", "tags": ["<2-5 lowercase topic tags>"], "people": [""], "action_items": [""] } Rules: - type: insight = general knowledge/observation, person_note = about a specific person, task = something to do, reference = link/resource/tool, idea = creative/speculative - tags: short lowercase words, no spaces (use underscores if needed) - people: first name or full name as written - action_items: concrete, actionable phrases only — omit if none - Keep all lists concise (max 5 items each) """ async def extract_metadata(text: str) -> dict: """ Extract type, tags, people, and action_items from a thought. Returns a dict. Falls back to minimal metadata on any error. """ from openai import AsyncOpenAI from ..database import credential_store api_key = await credential_store.get("system:openrouter_api_key") if not api_key: return {"type": "other", "tags": [], "people": [], "action_items": []} client = AsyncOpenAI( api_key=api_key, base_url="https://openrouter.ai/api/v1", default_headers={ "HTTP-Referer": "https://mac.oai.pm", "X-Title": "oAI-Web", }, ) try: response = await client.chat.completions.create( model=_MODEL, messages=[ {"role": "system", "content": _SYSTEM_PROMPT}, {"role": "user", "content": text}, ], temperature=0, max_tokens=256, response_format={"type": "json_object"}, ) raw = response.choices[0].message.content or "{}" data = json.loads(raw) return { "type": str(data.get("type", "other")), "tags": [str(t) for t in data.get("tags", [])], "people": [str(p) for p in data.get("people", [])], "action_items": [str(a) for a in data.get("action_items", [])], } except Exception as e: logger.warning("Metadata extraction failed: %s", e) return {"type": "other", "tags": [], "people": [], "action_items": []}