feat: Jon Snow Phase 2 — FastAPI orchestrator with LiteLLM brain

OpenAI-compatible API at :8900. Intent classifier routes status queries
to FAST_MODEL (Ollama), task submissions to Plane, planning to SMART_MODEL.
Reads agent-os logs for status context. Phase 3: approval gate + execution.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-05-16 13:06:20 +00:00
commit a25deeb8f4
9 changed files with 503 additions and 0 deletions
View File
+40
View File
@@ -0,0 +1,40 @@
import logging
import os
import litellm
logger = logging.getLogger("jon-snow.brain")
litellm.set_verbose = False
FAST_MODEL = os.getenv("FAST_MODEL", "ollama/gemma4")
SMART_MODEL = os.getenv("SMART_MODEL", "ollama/gemma4")
OLLAMA_BASE_URL = os.getenv("OLLAMA_BASE_URL", "http://172.27.40.20:11434")
def _extra_kwargs(model: str) -> dict:
if model.startswith("ollama/"):
return {"api_base": OLLAMA_BASE_URL}
return {}
async def stream_completion(messages: list[dict], use_smart: bool = False):
model = SMART_MODEL if use_smart else FAST_MODEL
logger.info(f"Brain: model={model} smart={use_smart}")
try:
return await litellm.acompletion(
model=model,
messages=messages,
stream=True,
**_extra_kwargs(model),
)
except Exception as e:
logger.error(f"Brain error ({model}): {e}")
if use_smart and model != FAST_MODEL:
logger.info("Falling back to FAST_MODEL")
return await litellm.acompletion(
model=FAST_MODEL,
messages=messages,
stream=True,
**_extra_kwargs(FAST_MODEL),
)
raise
+78
View File
@@ -0,0 +1,78 @@
AGENT_NAMES = {"hodor", "bran", "varys", "sam", "raven", "qyburn", "citadel", "jon"}
STATUS_PHRASES = {
"status", "health", "running", "last run", "what did", "when did",
"show me", "how is", "is it", "is running", "did it run", "output",
"summary", "report", "check", "monitor", "alive", "up",
}
TASK_PHRASES = {
"create task", "add task", "add issue", "create issue", "log task",
"log this", "new task", "new issue", "add to plane", "add to backlog",
"plan", "schedule", "remind", "track", "todo", "to do",
}
RESEARCH_PHRASES = {
"research", "search", "find out", "look up", "what is", "explain",
"how does", "documentation", "docs",
}
def classify_intent(message: str) -> str:
msg = message.lower()
words = set(msg.split())
# Agent name + query word → status
if words & AGENT_NAMES:
if any(p in msg for p in STATUS_PHRASES) or words & {"status", "check", "output", "run"}:
return "status"
# Explicit task phrases → task
if any(p in msg for p in TASK_PHRASES):
return "task"
# Generic status signal words
if any(p in msg for p in STATUS_PHRASES) and words & AGENT_NAMES:
return "status"
# Status if asking purely about the agent ecosystem
if words & AGENT_NAMES and not any(p in msg for p in {"build", "implement", "create", "make"}):
return "status"
# Research intent → route to smart model
if any(p in msg for p in RESEARCH_PHRASES):
return "planning"
return "planning"
def extract_agent_name(message: str) -> str | None:
msg = message.lower()
for name in AGENT_NAMES:
if name in msg:
return name
return None
PROJECT_KEYWORDS = {
"bni": "BNI Scheduler",
"scheduler": "BNI Scheduler",
"monitor": "Monitoring",
"monitoring": "Monitoring",
"grafana": "Monitoring",
"maester": "Maester Reports",
"report": "Maester Reports",
"csf": "Maester Reports",
"nist": "Maester Reports",
"portal": "Nexum Portal",
"authelia": "Nexum Portal",
"nexum": "Nexum Portal",
}
def extract_project_name(message: str) -> str | None:
msg = message.lower()
for kw, project in PROJECT_KEYWORDS.items():
if kw in msg:
return project
return None
+227
View File
@@ -0,0 +1,227 @@
import asyncio
import json
import logging
import os
import time
from datetime import datetime, timezone
from pathlib import Path
from typing import AsyncGenerator
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse, StreamingResponse
from pydantic import BaseModel
from .brain import stream_completion
from .intent import classify_intent, extract_agent_name, extract_project_name
from .tools import create_plane_issue, get_agent_output, get_all_agent_status
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(name)s %(levelname)s %(message)s")
logger = logging.getLogger("jon-snow")
AGENT_OS_DIR = Path(os.getenv("AGENT_OS_DIR", "/opt/agent-os"))
SITES_DIR = Path(os.getenv("SITES_DIR", "/opt/sites"))
app = FastAPI(title="Jon Snow", version="0.2.0")
app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"])
SYSTEM_PROMPT = """You are Jon Snow, chief of staff for the NxM home lab agent ecosystem on a self-hosted Linux server (172.27.40.3).
Live agents you coordinate:
- hodor (8200): HTTP gateway — routes requests to Ollama
- bran: Daily changelog summariser — runs 06:00 SAST, writes to /opt/sites/changelog/
- varys: Infrastructure monitor — runs every 15 min, HTTP health checks + agent watchdog
- sam (8500): Research agent — SearXNG + Ollama synthesis
- raven (8400): Notifications — Discord webhook + Gmail SMTP
- qyburn (8700): LLM coding agent — qwen2.5-coder:14b, approve/reject workflow
- citadel (8300): MCP tool registry — 16 tools including Plane integration
Infrastructure: Ubuntu Docker host 172.27.40.3, Ollama at 172.27.40.20:11434, Netbird VPN (100.119.x.x), Plane project management, Gitea at git.nxm.co.za.
Your current capabilities (Phase 2):
1. Report agent status — last run time, success/failure, output summary
2. Log tasks to Plane project management
3. Answer questions about the infrastructure
4. Route complex questions to your SMART_MODEL brain
You cannot yet execute tasks autonomously — that is Phase 3. When a user submits a task, log it to Plane and confirm.
Be concise — the user is often on mobile. Use short markdown lists, not long paragraphs."""
# --- OpenAI-compatible request/response models ---
class Message(BaseModel):
role: str
content: str
class ChatRequest(BaseModel):
model: str = "jon-snow"
messages: list[Message]
stream: bool = True
temperature: float | None = None
max_tokens: int | None = None
# --- Output helpers ---
def _write_status(intent: str, summary: str, status: str = "success") -> None:
log_dir = AGENT_OS_DIR / "logs" / "jon-snow"
log_dir.mkdir(parents=True, exist_ok=True)
payload = {
"agent": "jon-snow",
"timestamp": datetime.now(timezone.utc).isoformat(),
"status": status,
"result": f"[{intent}] {summary[:200]}",
}
(log_dir / "last-run.json").write_text(json.dumps(payload, indent=2))
out_dir = SITES_DIR / "jon-snow"
out_dir.mkdir(parents=True, exist_ok=True)
(out_dir / "last-output.md").write_text(
f"# Jon Snow — Last Response\n\n**{payload['timestamp']}**\n\nIntent: `{intent}`\n\n{summary[:500]}\n"
)
# --- SSE streaming helpers ---
def _sse_chunk(content: str, chunk_id: str) -> str:
data = {
"id": chunk_id,
"object": "chat.completion.chunk",
"created": int(time.time()),
"model": "jon-snow",
"choices": [{"index": 0, "delta": {"content": content}, "finish_reason": None}],
}
return f"data: {json.dumps(data)}\n\n"
def _sse_done(chunk_id: str) -> str:
data = {
"id": chunk_id,
"object": "chat.completion.chunk",
"created": int(time.time()),
"model": "jon-snow",
"choices": [{"index": 0, "delta": {}, "finish_reason": "stop"}],
}
return f"data: {json.dumps(data)}\n\ndata: [DONE]\n\n"
async def _stream_text(text: str) -> AsyncGenerator[str, None]:
chunk_id = f"chatcmpl-{int(time.time())}"
# Emit word by word for a natural feel
words = text.split(" ")
for i, word in enumerate(words):
token = word + (" " if i < len(words) - 1 else "")
yield _sse_chunk(token, chunk_id)
await asyncio.sleep(0.005)
yield _sse_done(chunk_id)
async def _stream_llm(messages: list[dict], use_smart: bool = False) -> AsyncGenerator[str, None]:
chunk_id = f"chatcmpl-{int(time.time())}"
collected = []
try:
response = await stream_completion(messages, use_smart=use_smart)
async for chunk in response:
if chunk.choices and chunk.choices[0].delta.content:
content = chunk.choices[0].delta.content
collected.append(content)
yield _sse_chunk(content, chunk_id)
yield _sse_done(chunk_id)
except Exception as e:
logger.error(f"LLM stream error: {e}")
error_msg = f"Error reaching LLM: {e}"
async for part in _stream_text(error_msg):
yield part
collected.append(error_msg)
return
# --- Routes ---
@app.get("/health")
async def health():
return {"status": "ok", "agent": "jon-snow", "version": "0.2.0"}
@app.get("/v1/models")
async def list_models():
return {
"object": "list",
"data": [{"id": "jon-snow", "object": "model", "created": 0, "owned_by": "nxm"}],
}
@app.post("/v1/chat/completions")
async def chat_completions(req: ChatRequest):
user_message = next((m.content for m in reversed(req.messages) if m.role == "user"), "")
intent = classify_intent(user_message)
logger.info(f"intent={intent} msg={user_message[:100]!r}")
messages = [{"role": "system", "content": SYSTEM_PROMPT}]
messages += [{"role": m.role, "content": m.content} for m in req.messages]
async def generate() -> AsyncGenerator[str, None]:
summary = ""
if intent == "status":
status_ctx = get_all_agent_status(AGENT_OS_DIR)
agent_name = extract_agent_name(user_message)
if agent_name:
output = get_agent_output(SITES_DIR, agent_name)
if output:
status_ctx += f"\n\n### {agent_name} last output:\n{output}"
messages[0]["content"] += f"\n\n## Current Agent Status\n{status_ctx}"
async for chunk in _stream_llm(messages, use_smart=False):
yield chunk
summary = f"Status query: {user_message[:100]}"
elif intent == "task":
project_hint = extract_project_name(user_message)
title = user_message.strip()
try:
issue = await create_plane_issue(title, project_hint)
response_text = (
f"Task logged to Plane.\n\n"
f"**{issue['title']}** \n"
f"Project: *{issue['project']}* | #{issue['sequence_id']}\n\n"
f"I can't execute tasks yet (Phase 3). It's in the backlog."
)
summary = f"Task created: {issue['title']}"
except Exception as e:
response_text = f"Couldn't log to Plane ({e}). Task noted locally: {user_message[:100]}"
summary = f"Plane error: {e}"
async for chunk in _stream_text(response_text):
yield chunk
else: # planning / general
async for chunk in _stream_llm(messages, use_smart=True):
yield chunk
summary = f"Planning query: {user_message[:100]}"
_write_status(intent, summary)
if req.stream:
return StreamingResponse(generate(), media_type="text/event-stream")
# Non-streaming: collect full response
full_text = ""
async for chunk in generate():
if chunk.startswith("data: ") and "[DONE]" not in chunk:
try:
data = json.loads(chunk[6:])
token = data.get("choices", [{}])[0].get("delta", {}).get("content", "")
full_text += token
except Exception:
pass
return JSONResponse({
"id": f"chatcmpl-{int(time.time())}",
"object": "chat.completion",
"created": int(time.time()),
"model": "jon-snow",
"choices": [{"index": 0, "message": {"role": "assistant", "content": full_text}, "finish_reason": "stop"}],
"usage": {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0},
})
+112
View File
@@ -0,0 +1,112 @@
import json
import logging
import os
from pathlib import Path
import httpx
logger = logging.getLogger("jon-snow.tools")
PLANE_URL = os.getenv("PLANE_URL", "http://172.27.40.3:8095")
PLANE_WORKSPACE = os.getenv("PLANE_WORKSPACE", "nxm")
PLANE_API_KEY = os.getenv("PLANE_API_KEY", "")
AGENT_FULL_NAMES = [
"hodor-gateway", "bran-changelog", "varys-monitor", "sam-research",
"raven-notify", "qyburn-coder", "citadel-mcp", "jon-snow",
]
SHORT_TO_FULL = {
"hodor": "hodor-gateway",
"bran": "bran-changelog",
"varys": "varys-monitor",
"sam": "sam-research",
"raven": "raven-notify",
"qyburn": "qyburn-coder",
"citadel": "citadel-mcp",
"jon": "jon-snow",
}
def get_all_agent_status(agent_os_dir: Path) -> str:
logs_dir = agent_os_dir / "logs"
if not logs_dir.exists():
return "Agent log directory not found."
lines = []
for full_name in AGENT_FULL_NAMES:
log_file = logs_dir / full_name / "last-run.json"
if log_file.exists():
try:
data = json.loads(log_file.read_text())
ts = data.get("timestamp", "unknown")[:19].replace("T", " ")
status = data.get("status", "unknown")
result = data.get("result", "")[:120]
icon = "OK" if status == "success" else "FAIL"
lines.append(f"- **{full_name}**: [{icon}] {ts} UTC — {result}")
except Exception as e:
lines.append(f"- **{full_name}**: error reading log ({e})")
else:
lines.append(f"- **{full_name}**: no log (never run or not yet deployed)")
return "\n".join(lines)
def get_agent_output(sites_dir: Path, agent_name: str) -> str | None:
short_name = agent_name if agent_name in SHORT_TO_FULL else next(
(k for k, v in SHORT_TO_FULL.items() if v == agent_name), agent_name
)
output_file = sites_dir / short_name / "last-output.md"
if not output_file.exists():
return None
content = output_file.read_text()
return content[:3000]
async def get_plane_projects() -> list[dict]:
headers = {"X-Api-Key": PLANE_API_KEY}
async with httpx.AsyncClient() as client:
resp = await client.get(
f"{PLANE_URL}/api/v1/workspaces/{PLANE_WORKSPACE}/projects/",
headers=headers, timeout=10,
)
resp.raise_for_status()
return resp.json().get("results", [])
def _resolve_project(projects: list[dict], hint: str | None) -> dict:
if hint:
hint_lower = hint.lower()
for p in projects:
if hint_lower in p["name"].lower():
return p
# Default: Agent Ecosystem
for p in projects:
if "agent" in p["name"].lower():
return p
return projects[0]
async def create_plane_issue(title: str, project_hint: str | None = None) -> dict:
projects = await get_plane_projects()
if not projects:
raise ValueError("No Plane projects found")
project = _resolve_project(projects, project_hint)
headers = {"X-Api-Key": PLANE_API_KEY, "Content-Type": "application/json"}
async with httpx.AsyncClient() as client:
resp = await client.post(
f"{PLANE_URL}/api/v1/workspaces/{PLANE_WORKSPACE}/projects/{project['id']}/issues/",
headers=headers,
json={"name": title[:255], "priority": "none"},
timeout=10,
)
resp.raise_for_status()
issue = resp.json()
return {
"sequence_id": issue.get("sequence_id", "?"),
"title": title[:80],
"project": project["name"],
}