81 lines
2.7 KiB
Python
81 lines
2.7 KiB
Python
"""
|
|
brain/metadata.py — LLM-based metadata extraction.
|
|
|
|
Extracts structured metadata from a thought using a fast model (gpt-4o-mini
|
|
via OpenRouter). Returns type classification, tags, people, and action items.
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
import logging
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
_MODEL = "openai/gpt-4o-mini"
|
|
|
|
_SYSTEM_PROMPT = """\
|
|
You are a metadata extractor for a personal knowledge base. Given a thought,
|
|
extract structured metadata and return ONLY valid JSON — no explanation, no markdown.
|
|
|
|
JSON schema:
|
|
{
|
|
"type": "<one of: insight | person_note | task | reference | idea | other>",
|
|
"tags": ["<2-5 lowercase topic tags>"],
|
|
"people": ["<names of people mentioned, if any>"],
|
|
"action_items": ["<concrete next actions, if any>"]
|
|
}
|
|
|
|
Rules:
|
|
- type: insight = general knowledge/observation, person_note = about a specific person,
|
|
task = something to do, reference = link/resource/tool, idea = creative/speculative
|
|
- tags: short lowercase words, no spaces (use underscores if needed)
|
|
- people: first name or full name as written
|
|
- action_items: concrete, actionable phrases only — omit if none
|
|
- Keep all lists concise (max 5 items each)
|
|
"""
|
|
|
|
|
|
async def extract_metadata(text: str) -> dict:
|
|
"""
|
|
Extract type, tags, people, and action_items from a thought.
|
|
Returns a dict. Falls back to minimal metadata on any error.
|
|
"""
|
|
from openai import AsyncOpenAI
|
|
from ..database import credential_store
|
|
|
|
api_key = await credential_store.get("system:openrouter_api_key")
|
|
if not api_key:
|
|
return {"type": "other", "tags": [], "people": [], "action_items": []}
|
|
|
|
client = AsyncOpenAI(
|
|
api_key=api_key,
|
|
base_url="https://openrouter.ai/api/v1",
|
|
default_headers={
|
|
"HTTP-Referer": "https://mac.oai.pm",
|
|
"X-Title": "oAI-Web",
|
|
},
|
|
)
|
|
|
|
try:
|
|
response = await client.chat.completions.create(
|
|
model=_MODEL,
|
|
messages=[
|
|
{"role": "system", "content": _SYSTEM_PROMPT},
|
|
{"role": "user", "content": text},
|
|
],
|
|
temperature=0,
|
|
max_tokens=256,
|
|
response_format={"type": "json_object"},
|
|
)
|
|
raw = response.choices[0].message.content or "{}"
|
|
data = json.loads(raw)
|
|
return {
|
|
"type": str(data.get("type", "other")),
|
|
"tags": [str(t) for t in data.get("tags", [])],
|
|
"people": [str(p) for p in data.get("people", [])],
|
|
"action_items": [str(a) for a in data.get("action_items", [])],
|
|
}
|
|
except Exception as e:
|
|
logger.warning("Metadata extraction failed: %s", e)
|
|
return {"type": "other", "tags": [], "people": [], "action_items": []}
|