feat: Jon Snow Phase 2 — FastAPI orchestrator with LiteLLM brain
OpenAI-compatible API at :8900. Intent classifier routes status queries to FAST_MODEL (Ollama), task submissions to Plane, planning to SMART_MODEL. Reads agent-os logs for status context. Phase 3: approval gate + execution. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,40 @@
|
||||
import logging
|
||||
import os
|
||||
|
||||
import litellm
|
||||
|
||||
logger = logging.getLogger("jon-snow.brain")
|
||||
litellm.set_verbose = False
|
||||
|
||||
FAST_MODEL = os.getenv("FAST_MODEL", "ollama/gemma4")
|
||||
SMART_MODEL = os.getenv("SMART_MODEL", "ollama/gemma4")
|
||||
OLLAMA_BASE_URL = os.getenv("OLLAMA_BASE_URL", "http://172.27.40.20:11434")
|
||||
|
||||
|
||||
def _extra_kwargs(model: str) -> dict:
|
||||
if model.startswith("ollama/"):
|
||||
return {"api_base": OLLAMA_BASE_URL}
|
||||
return {}
|
||||
|
||||
|
||||
async def stream_completion(messages: list[dict], use_smart: bool = False):
|
||||
model = SMART_MODEL if use_smart else FAST_MODEL
|
||||
logger.info(f"Brain: model={model} smart={use_smart}")
|
||||
try:
|
||||
return await litellm.acompletion(
|
||||
model=model,
|
||||
messages=messages,
|
||||
stream=True,
|
||||
**_extra_kwargs(model),
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Brain error ({model}): {e}")
|
||||
if use_smart and model != FAST_MODEL:
|
||||
logger.info("Falling back to FAST_MODEL")
|
||||
return await litellm.acompletion(
|
||||
model=FAST_MODEL,
|
||||
messages=messages,
|
||||
stream=True,
|
||||
**_extra_kwargs(FAST_MODEL),
|
||||
)
|
||||
raise
|
||||
Reference in New Issue
Block a user