jon-snow/app/brain.py

import json
import logging
import os

import litellm

logger = logging.getLogger("jon-snow.brain")
litellm.set_verbose = False

ANTHROPIC_API_KEY = os.getenv("ANTHROPIC_API_KEY", "")
CLAUDE_MODEL = os.getenv("CLAUDE_MODEL", "claude-sonnet-4-6")

FAST_MODEL = os.getenv("FAST_MODEL", "ollama/llama3.1:8b")
OLLAMA_BASE_URL = os.getenv("OLLAMA_BASE_URL", "http://172.27.40.20:11434")

HERMES_URL = os.getenv("HERMES_URL", "")
HERMES_API_KEY = os.getenv("HERMES_API_KEY", "none")

EXTRACT_SYSTEM = (
    "Extract the task title and destination project from the user message.\n"
    "Rules:\n"
    "- title: the actual task to be done, stripped of all filler "
    "(no 'please add', 'a work item', 'a job item', 'we need to', etc.)\n"
    "- project: the client or project name if mentioned, otherwise null\n"
    "Reply with JSON only, no other text: "
    "{\"title\": \"...\", \"project\": \"...\" or null}"
)


def _anthropic_kwargs() -> dict:
    return {"api_key": ANTHROPIC_API_KEY, "model": f"anthropic/{CLAUDE_MODEL}"}


def _hermes_kwargs() -> dict:
    return {"model": "openai/hermes-agent", "api_base": HERMES_URL, "api_key": HERMES_API_KEY}


def _ollama_kwargs() -> dict:
    return {"model": FAST_MODEL, "api_base": OLLAMA_BASE_URL}


def _primary_kwargs() -> dict:
    if ANTHROPIC_API_KEY:
        return _anthropic_kwargs()
    if HERMES_URL:
        return _hermes_kwargs()
    return _ollama_kwargs()


async def extract_task_fields(message: str) -> tuple[dict, dict]:
    """Returns (fields, usage).
    fields = {"title": str, "project": str | None}
    usage  = {"prompt_tokens": int, "completion_tokens": int}
    """
    prompt = [
        {"role": "system", "content": EXTRACT_SYSTEM},
        {"role": "user", "content": message},
    ]
    try:
        resp = await litellm.acompletion(stream=False, messages=prompt, **_primary_kwargs())
        usage = {
            "prompt_tokens": resp.usage.prompt_tokens if resp.usage else 0,
            "completion_tokens": resp.usage.completion_tokens if resp.usage else 0,
        }
        content = resp.choices[0].message.content.strip()
        # Strip markdown code fences if model wraps the JSON
        if content.startswith("```"):
            content = content.split("```")[1]
            if content.startswith("json"):
                content = content[4:]
        fields = json.loads(content.strip())
        return fields, usage
    except Exception as e:
        logger.warning(f"extract_task_fields failed: {e}")
        return {"title": None, "project": None}, {"prompt_tokens": 0, "completion_tokens": 0}


async def stream_completion(messages: list[dict], use_smart: bool = False):
    """Streaming LLM call. Returns (stream, usage_future) where usage is captured
    from the final chunk when stream_options include_usage is supported."""
    kwargs = _primary_kwargs()
    extra = {}
    # Request usage in final streaming chunk (supported by Anthropic + OpenAI)
    if ANTHROPIC_API_KEY or HERMES_URL:
        extra["stream_options"] = {"include_usage": True}

    logger.info(f"Brain: model={kwargs.get('model')} smart={use_smart}")
    try:
        return await litellm.acompletion(stream=True, messages=messages, **kwargs, **extra)
    except Exception as e:
        logger.error(f"Brain error: {e}")
        if HERMES_URL and not ANTHROPIC_API_KEY:
            logger.info("Falling back to Ollama")
            return await litellm.acompletion(stream=True, messages=messages, **_ollama_kwargs())
        raise