oai-web/server/brain/ingest.py

"""
brain/ingest.py — Thought ingestion pipeline.

Runs embedding generation and metadata extraction in parallel, then stores
both in PostgreSQL. Returns the stored thought ID and a human-readable
confirmation string suitable for sending back via Telegram.
"""
from __future__ import annotations

import asyncio
import logging

logger = logging.getLogger(__name__)


async def ingest_thought(content: str, user_id: str | None = None) -> dict:
    """
    Full ingestion pipeline for one thought:
      1. Generate embedding + extract metadata (parallel)
      2. Store in PostgreSQL
      3. Return {id, metadata, confirmation}

    Raises RuntimeError if Brain DB is not available.
    """
    from .embeddings import get_embedding
    from .metadata import extract_metadata
    from .database import insert_thought

    # Run embedding and metadata extraction in parallel
    embedding, metadata = await asyncio.gather(
        get_embedding(content),
        extract_metadata(content),
    )

    thought_id = await insert_thought(content, embedding, metadata, user_id=user_id)

    # Build a human-readable confirmation (like the Slack bot reply in the guide)
    thought_type = metadata.get("type", "other")
    tags = metadata.get("tags", [])
    people = metadata.get("people", [])
    actions = metadata.get("action_items", [])

    lines = [f"✅ Captured as {thought_type}"]
    if tags:
        lines[0] += f" — {', '.join(tags)}"
    if people:
        lines.append(f"People: {', '.join(people)}")
    if actions:
        lines.append("Actions: " + "; ".join(actions))

    return {
        "id": thought_id,
        "metadata": metadata,
        "confirmation": "\n".join(lines),
    }