import json import logging import os import litellm logger = logging.getLogger("jon-snow.brain") litellm.set_verbose = False ANTHROPIC_API_KEY = os.getenv("ANTHROPIC_API_KEY", "") CLAUDE_MODEL = os.getenv("CLAUDE_MODEL", "claude-sonnet-4-6") FAST_MODEL = os.getenv("FAST_MODEL", "ollama/llama3.1:8b") OLLAMA_BASE_URL = os.getenv("OLLAMA_BASE_URL", "http://172.27.40.20:11434") HERMES_URL = os.getenv("HERMES_URL", "") HERMES_API_KEY = os.getenv("HERMES_API_KEY", "none") EXTRACT_SYSTEM = ( "Extract the task title and destination project from the user message.\n" "Rules:\n" "- title: the actual task to be done, stripped of all filler " "(no 'please add', 'a work item', 'a job item', 'we need to', etc.)\n" "- project: the client or project name if mentioned, otherwise null\n" "Reply with JSON only, no other text: " "{\"title\": \"...\", \"project\": \"...\" or null}" ) def _anthropic_kwargs() -> dict: return {"api_key": ANTHROPIC_API_KEY, "model": f"anthropic/{CLAUDE_MODEL}"} def _hermes_kwargs() -> dict: return {"model": "openai/hermes-agent", "api_base": HERMES_URL, "api_key": HERMES_API_KEY} def _ollama_kwargs() -> dict: return {"model": FAST_MODEL, "api_base": OLLAMA_BASE_URL} def _primary_kwargs() -> dict: if ANTHROPIC_API_KEY: return _anthropic_kwargs() if HERMES_URL: return _hermes_kwargs() return _ollama_kwargs() async def extract_task_fields(message: str) -> tuple[dict, dict]: """Returns (fields, usage). fields = {"title": str, "project": str | None} usage = {"prompt_tokens": int, "completion_tokens": int} """ prompt = [ {"role": "system", "content": EXTRACT_SYSTEM}, {"role": "user", "content": message}, ] try: resp = await litellm.acompletion(stream=False, messages=prompt, **_primary_kwargs()) usage = { "prompt_tokens": resp.usage.prompt_tokens if resp.usage else 0, "completion_tokens": resp.usage.completion_tokens if resp.usage else 0, } content = resp.choices[0].message.content.strip() # Strip markdown code fences if model wraps the JSON if content.startswith("```"): content = content.split("```")[1] if content.startswith("json"): content = content[4:] fields = json.loads(content.strip()) return fields, usage except Exception as e: logger.warning(f"extract_task_fields failed: {e}") return {"title": None, "project": None}, {"prompt_tokens": 0, "completion_tokens": 0} async def stream_completion(messages: list[dict], use_smart: bool = False): """Streaming LLM call. Returns (stream, usage_future) where usage is captured from the final chunk when stream_options include_usage is supported.""" kwargs = _primary_kwargs() extra = {} # Request usage in final streaming chunk (supported by Anthropic + OpenAI) if ANTHROPIC_API_KEY or HERMES_URL: extra["stream_options"] = {"include_usage": True} logger.info(f"Brain: model={kwargs.get('model')} smart={use_smart}") try: return await litellm.acompletion(stream=True, messages=messages, **kwargs, **extra) except Exception as e: logger.error(f"Brain error: {e}") if HERMES_URL and not ANTHROPIC_API_KEY: logger.info("Falling back to Ollama") return await litellm.acompletion(stream=True, messages=messages, **_ollama_kwargs()) raise