import logging import os import litellm logger = logging.getLogger("jon-snow.brain") litellm.set_verbose = False FAST_MODEL = os.getenv("FAST_MODEL", "ollama/gemma4") SMART_MODEL = os.getenv("SMART_MODEL", "ollama/gemma4") OLLAMA_BASE_URL = os.getenv("OLLAMA_BASE_URL", "http://172.27.40.20:11434") HERMES_URL = os.getenv("HERMES_URL", "") HERMES_API_KEY = os.getenv("HERMES_API_KEY", "none") async def stream_completion(messages: list[dict], use_smart: bool = False): if HERMES_URL: logger.info("Brain: routing to Hermes cloud (claude-sonnet-4-6)") return await litellm.acompletion( model="openai/hermes-agent", messages=messages, stream=True, api_base=HERMES_URL, api_key=HERMES_API_KEY, ) model = SMART_MODEL if use_smart else FAST_MODEL logger.info(f"Brain: model={model} smart={use_smart}") try: return await litellm.acompletion( model=model, messages=messages, stream=True, api_base=OLLAMA_BASE_URL if model.startswith("ollama/") else None, ) except Exception as e: logger.error(f"Brain error ({model}): {e}") if use_smart and model != FAST_MODEL: logger.info("Falling back to FAST_MODEL") return await litellm.acompletion( model=FAST_MODEL, messages=messages, stream=True, api_base=OLLAMA_BASE_URL if FAST_MODEL.startswith("ollama/") else None, ) raise