import logging
import os

import litellm

logger = logging.getLogger("jon-snow.brain")
litellm.set_verbose = False

FAST_MODEL = os.getenv("FAST_MODEL", "ollama/gemma4")
SMART_MODEL = os.getenv("SMART_MODEL", "ollama/gemma4")
OLLAMA_BASE_URL = os.getenv("OLLAMA_BASE_URL", "http://172.27.40.20:11434")
HERMES_URL = os.getenv("HERMES_URL", "")
HERMES_API_KEY = os.getenv("HERMES_API_KEY", "none")


async def stream_completion(messages: list[dict], use_smart: bool = False):
    if HERMES_URL:
        logger.info("Brain: routing to Hermes cloud (claude-sonnet-4-6)")
        return await litellm.acompletion(
            model="openai/hermes-agent",
            messages=messages,
            stream=True,
            api_base=HERMES_URL,
            api_key=HERMES_API_KEY,
        )

    model = SMART_MODEL if use_smart else FAST_MODEL
    logger.info(f"Brain: model={model} smart={use_smart}")
    try:
        return await litellm.acompletion(
            model=model,
            messages=messages,
            stream=True,
            api_base=OLLAMA_BASE_URL if model.startswith("ollama/") else None,
        )
    except Exception as e:
        logger.error(f"Brain error ({model}): {e}")
        if use_smart and model != FAST_MODEL:
            logger.info("Falling back to FAST_MODEL")
            return await litellm.acompletion(
                model=FAST_MODEL,
                messages=messages,
                stream=True,
                api_base=OLLAMA_BASE_URL if FAST_MODEL.startswith("ollama/") else None,
            )
        raise