oai-web/server/providers/models.py

"""
providers/models.py — Dynamic model list for all active providers.

Anthropic has no public models API, so current models are hardcoded.
OpenRouter models are fetched from their API and cached for one hour.

Usage:
    models, default = await get_available_models()
    info           = await get_models_info()
"""
from __future__ import annotations

import logging
import time

logger = logging.getLogger(__name__)

# Current Anthropic models (update when new ones ship)
_ANTHROPIC_MODELS = [
    "anthropic:claude-opus-4-6",
    "anthropic:claude-sonnet-4-6",
    "anthropic:claude-haiku-4-5-20251001",
]

_ANTHROPIC_MODEL_INFO = [
    {
        "id": "anthropic:claude-opus-4-6",
        "provider": "anthropic",
        "bare_id": "claude-opus-4-6",
        "name": "Claude Opus 4.6",
        "context_length": 200000,
        "description": "Anthropic's most powerful model. Best for complex reasoning, nuanced writing, and sophisticated analysis.",
        "capabilities": {"vision": True, "tools": True, "online": False, "image_gen": False},
        "pricing": {"prompt_per_1m": None, "completion_per_1m": None},
        "architecture": {"tokenizer": "claude", "modality": "text+image->text"},
    },
    {
        "id": "anthropic:claude-sonnet-4-6",
        "provider": "anthropic",
        "bare_id": "claude-sonnet-4-6",
        "name": "Claude Sonnet 4.6",
        "context_length": 200000,
        "description": "Best balance of speed and intelligence. Ideal for most tasks requiring strong reasoning with faster response times.",
        "capabilities": {"vision": True, "tools": True, "online": False, "image_gen": False},
        "pricing": {"prompt_per_1m": None, "completion_per_1m": None},
        "architecture": {"tokenizer": "claude", "modality": "text+image->text"},
    },
    {
        "id": "anthropic:claude-haiku-4-5-20251001",
        "provider": "anthropic",
        "bare_id": "claude-haiku-4-5-20251001",
        "name": "Claude Haiku 4.5",
        "context_length": 200000,
        "description": "Fastest and most compact Claude model. Great for quick tasks, simple Q&A, and high-throughput workloads.",
        "capabilities": {"vision": True, "tools": True, "online": False, "image_gen": False},
        "pricing": {"prompt_per_1m": None, "completion_per_1m": None},
        "architecture": {"tokenizer": "claude", "modality": "text+image->text"},
    },
]

# Current OpenAI models (hardcoded — update when new ones ship)
_OPENAI_MODELS = [
    "openai:gpt-4o",
    "openai:gpt-4o-mini",
    "openai:gpt-4-turbo",
    "openai:o3-mini",
    "openai:gpt-5-image",
]

_OPENAI_MODEL_INFO = [
    {
        "id": "openai:gpt-4o",
        "provider": "openai",
        "bare_id": "gpt-4o",
        "name": "GPT-4o",
        "context_length": 128000,
        "description": "OpenAI's flagship model. Multimodal, fast, and highly capable for complex reasoning and generation tasks.",
        "capabilities": {"vision": True, "tools": True, "online": False, "image_gen": False},
        "pricing": {"prompt_per_1m": 2.50, "completion_per_1m": 10.00},
        "architecture": {"tokenizer": "cl100k", "modality": "text+image->text"},
    },
    {
        "id": "openai:gpt-4o-mini",
        "provider": "openai",
        "bare_id": "gpt-4o-mini",
        "name": "GPT-4o mini",
        "context_length": 128000,
        "description": "Fast and affordable GPT-4o variant. Great for high-throughput tasks that don't require maximum intelligence.",
        "capabilities": {"vision": True, "tools": True, "online": False, "image_gen": False},
        "pricing": {"prompt_per_1m": 0.15, "completion_per_1m": 0.60},
        "architecture": {"tokenizer": "cl100k", "modality": "text+image->text"},
    },
    {
        "id": "openai:gpt-4-turbo",
        "provider": "openai",
        "bare_id": "gpt-4-turbo",
        "name": "GPT-4 Turbo",
        "context_length": 128000,
        "description": "Previous-generation GPT-4 with 128K context window. Vision and tool use supported.",
        "capabilities": {"vision": True, "tools": True, "online": False, "image_gen": False},
        "pricing": {"prompt_per_1m": 10.00, "completion_per_1m": 30.00},
        "architecture": {"tokenizer": "cl100k", "modality": "text+image->text"},
    },
    {
        "id": "openai:o3-mini",
        "provider": "openai",
        "bare_id": "o3-mini",
        "name": "o3-mini",
        "context_length": 200000,
        "description": "OpenAI's efficient reasoning model. Excels at STEM tasks with strong tool-use support.",
        "capabilities": {"vision": False, "tools": True, "online": False, "image_gen": False},
        "pricing": {"prompt_per_1m": 1.10, "completion_per_1m": 4.40},
        "architecture": {"tokenizer": "cl100k", "modality": "text->text"},
    },
    {
        "id": "openai:gpt-5-image",
        "provider": "openai",
        "bare_id": "gpt-5-image",
        "name": "GPT-5 Image",
        "context_length": 128000,
        "description": "GPT-5 with native image generation. Produces high-quality images from text prompts with rich contextual understanding.",
        "capabilities": {"vision": True, "tools": False, "online": False, "image_gen": True},
        "pricing": {"prompt_per_1m": None, "completion_per_1m": None},
        "architecture": {"tokenizer": "cl100k", "modality": "text+image->image+text"},
    },
]

_or_raw: list[dict] = []      # full raw objects from OpenRouter /api/v1/models
_or_cache_ts: float = 0.0
_OR_CACHE_TTL = 3600  # seconds


async def _fetch_openrouter_raw(api_key: str) -> list[dict]:
    """Fetch full OpenRouter model objects, with a 1-hour in-memory cache."""
    global _or_raw, _or_cache_ts
    now = time.monotonic()
    if _or_raw and (now - _or_cache_ts) < _OR_CACHE_TTL:
        return _or_raw
    try:
        import httpx
        async with httpx.AsyncClient() as client:
            r = await client.get(
                "https://openrouter.ai/api/v1/models",
                headers={"Authorization": f"Bearer {api_key}"},
                timeout=10,
            )
            r.raise_for_status()
            data = r.json()
            _or_raw = [m for m in data.get("data", []) if m.get("id")]
            _or_cache_ts = now
            logger.info(f"[models] Fetched {len(_or_raw)} OpenRouter models")
            return _or_raw
    except Exception as e:
        logger.warning(f"[models] Failed to fetch OpenRouter models: {e}")
        return _or_raw  # return stale cache on error


async def _get_keys(user_id: str | None = None, is_admin: bool = True) -> tuple[str, str, str]:
    """Resolve anthropic + openrouter + openai keys for a user (user setting → global store)."""
    from ..database import credential_store, user_settings_store

    if user_id and not is_admin:
        # Admin may grant a user full access to system keys
        use_admin_keys = await user_settings_store.get(user_id, "use_admin_keys")
        if not use_admin_keys:
            ant_key = await user_settings_store.get(user_id, "anthropic_api_key") or ""
            oai_key = await user_settings_store.get(user_id, "openai_api_key") or ""
            # Non-admin with no own OR key: fall back to global (free models only)
            own_or = await user_settings_store.get(user_id, "openrouter_api_key")
            or_key = own_or or await credential_store.get("system:openrouter_api_key") or ""
            return ant_key, or_key, oai_key

    # Admin, anonymous, or user granted admin key access: full access from global store
    ant_key = await credential_store.get("system:anthropic_api_key") or ""
    or_key  = await credential_store.get("system:openrouter_api_key") or ""
    oai_key = await credential_store.get("system:openai_api_key") or ""
    return ant_key, or_key, oai_key


def _is_free_openrouter(m: dict) -> bool:
    """Return True if this OpenRouter model is free (pricing.prompt == "0")."""
    pricing = m.get("pricing", {})
    try:
        return float(pricing.get("prompt", "1")) == 0.0 and float(pricing.get("completion", "1")) == 0.0
    except (TypeError, ValueError):
        return False


async def get_available_models(
    user_id: str | None = None,
    is_admin: bool = True,
) -> tuple[list[str], str]:
    """
    Return (model_list, default_model).

    Always auto-builds from active providers:
      - Hardcoded Anthropic models if ANTHROPIC_API_KEY is set (and user has access)
      - All OpenRouter models (fetched + cached 1h) if OPENROUTER_API_KEY is set
        - Non-admin users with no own OR key are limited to free models only

    DEFAULT_CHAT_MODEL in .env sets the pre-selected default.
    """
    from ..config import settings
    from ..database import user_settings_store

    ant_key, or_key, oai_key = await _get_keys(user_id=user_id, is_admin=is_admin)

    # Determine access restrictions for non-admin users
    free_or_only = False
    if user_id and not is_admin:
        use_admin_keys = await user_settings_store.get(user_id, "use_admin_keys")
        if not use_admin_keys:
            own_ant = await user_settings_store.get(user_id, "anthropic_api_key")
            own_or  = await user_settings_store.get(user_id, "openrouter_api_key")
            if not own_ant:
                ant_key = ""  # block Anthropic unless they have their own key
            if not own_or and or_key:
                free_or_only = True

    models: list[str] = []
    if ant_key:
        models.extend(_ANTHROPIC_MODELS)
    if oai_key:
        models.extend(_OPENAI_MODELS)
    if or_key:
        raw = await _fetch_openrouter_raw(or_key)
        if free_or_only:
            raw = [m for m in raw if _is_free_openrouter(m)]
        models.extend(sorted(f"openrouter:{m['id']}" for m in raw))

    from ..database import credential_store
    if free_or_only:
        db_default = await credential_store.get("system:default_chat_model_free") \
                     or await credential_store.get("system:default_chat_model")
    else:
        db_default = await credential_store.get("system:default_chat_model")

    # Resolve default: DB override → .env → first available model
    candidate = db_default or settings.default_chat_model or (models[0] if models else "")
    # Ensure the candidate is actually in the model list
    default = candidate if candidate in models else (models[0] if models else "")
    return models, default


def get_or_output_modalities(bare_model_id: str) -> list[str]:
    """
    Return output_modalities for an OpenRouter model from the cached raw API data.
    Falls back to ["text"] if not found or cache is empty.
    Also detects known image-gen models by ID pattern as a fallback.
    """
    for m in _or_raw:
        if m.get("id") == bare_model_id:
            return m.get("architecture", {}).get("output_modalities") or ["text"]
    # Pattern fallback for when cache is cold or model isn't listed
    low = bare_model_id.lower()
    if any(p in low for p in ("-image", "/flux", "image-gen", "imagen")):
        return ["image", "text"]
    return ["text"]


async def get_capability_map(
    user_id: str | None = None,
    is_admin: bool = True,
) -> dict[str, dict]:
    """Return {model_id: {vision, tools, online}} for all available models."""
    info = await get_models_info(user_id=user_id, is_admin=is_admin)
    return {m["id"]: m.get("capabilities", {}) for m in info}


async def get_models_info(
    user_id: str | None = None,
    is_admin: bool = True,
) -> list[dict]:
    """
    Return rich metadata for all available models, filtered by user access tier.

    Anthropic entries use hardcoded info.
    OpenRouter entries are derived from the live API response.
    """
    from ..config import settings
    from ..database import user_settings_store

    ant_key, or_key, oai_key = await _get_keys(user_id=user_id, is_admin=is_admin)

    free_or_only = False
    if user_id and not is_admin:
        own_ant = await user_settings_store.get(user_id, "anthropic_api_key")
        own_or  = await user_settings_store.get(user_id, "openrouter_api_key")
        if not own_ant:
            ant_key = ""
        if not own_or and or_key:
            free_or_only = True

    results: list[dict] = []

    if ant_key:
        results.extend(_ANTHROPIC_MODEL_INFO)

    if oai_key:
        results.extend(_OPENAI_MODEL_INFO)

    if or_key:
        raw = await _fetch_openrouter_raw(or_key)
        if free_or_only:
            raw = [m for m in raw if _is_free_openrouter(m)]
        for m in raw:
            model_id = m.get("id", "")
            pricing = m.get("pricing", {})
            try:
                prompt_per_1m = float(pricing.get("prompt", 0)) * 1_000_000
            except (TypeError, ValueError):
                prompt_per_1m = None
            try:
                completion_per_1m = float(pricing.get("completion", 0)) * 1_000_000
            except (TypeError, ValueError):
                completion_per_1m = None

            arch = m.get("architecture", {})

            # Vision: OpenRouter returns either a list (new) or a modality string (old)
            input_modalities = arch.get("input_modalities") or []
            if not input_modalities:
                modality_str = arch.get("modality", "")
                input_part = modality_str.split("->")[0] if "->" in modality_str else modality_str
                input_modalities = [p.strip() for p in input_part.replace("+", " ").split() if p.strip()]

            # Tools: field may be named either way depending on API version
            supported_params = (
                m.get("supported_generation_parameters")
                or m.get("supported_parameters")
                or []
            )

            # Online: inherently-online models have "online" in their ID or name,
            # or belong to providers whose models are always web-connected
            name_lower = (m.get("name") or "").lower()
            online = (
                "online" in model_id
                or model_id.startswith("perplexity/")
                or "online" in name_lower
            )

            out_modalities = arch.get("output_modalities", ["text"])

            modality_display = arch.get("modality", "")
            if not modality_display and input_modalities:
                modality_display = "+".join(input_modalities) + "->" + "+".join(out_modalities)

            results.append({
                "id": f"openrouter:{model_id}",
                "provider": "openrouter",
                "bare_id": model_id,
                "name": m.get("name") or model_id,
                "context_length": m.get("context_length"),
                "description": m.get("description") or "",
                "capabilities": {
                    "vision": "image" in input_modalities,
                    "tools": "tools" in supported_params,
                    "online": online,
                    "image_gen": "image" in out_modalities,
                },
                "pricing": {
                    "prompt_per_1m": prompt_per_1m,
                    "completion_per_1m": completion_per_1m,
                },
                "architecture": {
                    "tokenizer": arch.get("tokenizer", ""),
                    "modality": modality_display,
                },
            })

    return results


async def get_access_tier(
    user_id: str | None = None,
    is_admin: bool = True,
) -> dict:
    """Return access restriction flags for the given user."""
    if not user_id or is_admin:
        return {"anthropic_blocked": False, "openrouter_free_only": False, "openai_blocked": False}
    from ..database import user_settings_store, credential_store
    use_admin_keys = await user_settings_store.get(user_id, "use_admin_keys")
    if use_admin_keys:
        return {"anthropic_blocked": False, "openrouter_free_only": False, "openai_blocked": False}
    own_ant   = await user_settings_store.get(user_id, "anthropic_api_key")
    own_or    = await user_settings_store.get(user_id, "openrouter_api_key")
    global_or = await credential_store.get("system:openrouter_api_key")
    return {
        "anthropic_blocked": not bool(own_ant),
        "openrouter_free_only": not bool(own_or) and bool(global_or),
        "openai_blocked": True,  # Non-admins always need their own OpenAI key
    }


def invalidate_openrouter_cache() -> None:
    """Force a fresh fetch on the next call (e.g. after an API key change)."""
    global _or_cache_ts
    _or_cache_ts = 0.0