Files
oai-web/server/brain/metadata.py
2026-04-08 12:43:24 +02:00

81 lines
2.7 KiB
Python

"""
brain/metadata.py — LLM-based metadata extraction.
Extracts structured metadata from a thought using a fast model (gpt-4o-mini
via OpenRouter). Returns type classification, tags, people, and action items.
"""
from __future__ import annotations
import json
import logging
logger = logging.getLogger(__name__)
_MODEL = "openai/gpt-4o-mini"
_SYSTEM_PROMPT = """\
You are a metadata extractor for a personal knowledge base. Given a thought,
extract structured metadata and return ONLY valid JSON — no explanation, no markdown.
JSON schema:
{
"type": "<one of: insight | person_note | task | reference | idea | other>",
"tags": ["<2-5 lowercase topic tags>"],
"people": ["<names of people mentioned, if any>"],
"action_items": ["<concrete next actions, if any>"]
}
Rules:
- type: insight = general knowledge/observation, person_note = about a specific person,
task = something to do, reference = link/resource/tool, idea = creative/speculative
- tags: short lowercase words, no spaces (use underscores if needed)
- people: first name or full name as written
- action_items: concrete, actionable phrases only — omit if none
- Keep all lists concise (max 5 items each)
"""
async def extract_metadata(text: str) -> dict:
"""
Extract type, tags, people, and action_items from a thought.
Returns a dict. Falls back to minimal metadata on any error.
"""
from openai import AsyncOpenAI
from ..database import credential_store
api_key = await credential_store.get("system:openrouter_api_key")
if not api_key:
return {"type": "other", "tags": [], "people": [], "action_items": []}
client = AsyncOpenAI(
api_key=api_key,
base_url="https://openrouter.ai/api/v1",
default_headers={
"HTTP-Referer": "https://mac.oai.pm",
"X-Title": "oAI-Web",
},
)
try:
response = await client.chat.completions.create(
model=_MODEL,
messages=[
{"role": "system", "content": _SYSTEM_PROMPT},
{"role": "user", "content": text},
],
temperature=0,
max_tokens=256,
response_format={"type": "json_object"},
)
raw = response.choices[0].message.content or "{}"
data = json.loads(raw)
return {
"type": str(data.get("type", "other")),
"tags": [str(t) for t in data.get("tags", [])],
"people": [str(p) for p in data.get("people", [])],
"action_items": [str(a) for a in data.get("action_items", [])],
}
except Exception as e:
logger.warning("Metadata extraction failed: %s", e)
return {"type": "other", "tags": [], "people": [], "action_items": []}