feat: Jon Snow Phase 2 — FastAPI orchestrator with LiteLLM brain
OpenAI-compatible API at :8900. Intent classifier routes status queries to FAST_MODEL (Ollama), task submissions to Plane, planning to SMART_MODEL. Reads agent-os logs for status context. Phase 3: approval gate + execution. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,3 @@
|
||||
.env
|
||||
__pycache__/
|
||||
*.pyc
|
||||
+11
@@ -0,0 +1,11 @@
|
||||
FROM python:3.12-slim
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
COPY requirements.txt .
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
COPY app/ ./app/
|
||||
|
||||
EXPOSE 8900
|
||||
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8900", "--log-level", "info"]
|
||||
@@ -0,0 +1,40 @@
|
||||
import logging
|
||||
import os
|
||||
|
||||
import litellm
|
||||
|
||||
logger = logging.getLogger("jon-snow.brain")
|
||||
litellm.set_verbose = False
|
||||
|
||||
FAST_MODEL = os.getenv("FAST_MODEL", "ollama/gemma4")
|
||||
SMART_MODEL = os.getenv("SMART_MODEL", "ollama/gemma4")
|
||||
OLLAMA_BASE_URL = os.getenv("OLLAMA_BASE_URL", "http://172.27.40.20:11434")
|
||||
|
||||
|
||||
def _extra_kwargs(model: str) -> dict:
|
||||
if model.startswith("ollama/"):
|
||||
return {"api_base": OLLAMA_BASE_URL}
|
||||
return {}
|
||||
|
||||
|
||||
async def stream_completion(messages: list[dict], use_smart: bool = False):
|
||||
model = SMART_MODEL if use_smart else FAST_MODEL
|
||||
logger.info(f"Brain: model={model} smart={use_smart}")
|
||||
try:
|
||||
return await litellm.acompletion(
|
||||
model=model,
|
||||
messages=messages,
|
||||
stream=True,
|
||||
**_extra_kwargs(model),
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Brain error ({model}): {e}")
|
||||
if use_smart and model != FAST_MODEL:
|
||||
logger.info("Falling back to FAST_MODEL")
|
||||
return await litellm.acompletion(
|
||||
model=FAST_MODEL,
|
||||
messages=messages,
|
||||
stream=True,
|
||||
**_extra_kwargs(FAST_MODEL),
|
||||
)
|
||||
raise
|
||||
@@ -0,0 +1,78 @@
|
||||
AGENT_NAMES = {"hodor", "bran", "varys", "sam", "raven", "qyburn", "citadel", "jon"}
|
||||
|
||||
STATUS_PHRASES = {
|
||||
"status", "health", "running", "last run", "what did", "when did",
|
||||
"show me", "how is", "is it", "is running", "did it run", "output",
|
||||
"summary", "report", "check", "monitor", "alive", "up",
|
||||
}
|
||||
|
||||
TASK_PHRASES = {
|
||||
"create task", "add task", "add issue", "create issue", "log task",
|
||||
"log this", "new task", "new issue", "add to plane", "add to backlog",
|
||||
"plan", "schedule", "remind", "track", "todo", "to do",
|
||||
}
|
||||
|
||||
RESEARCH_PHRASES = {
|
||||
"research", "search", "find out", "look up", "what is", "explain",
|
||||
"how does", "documentation", "docs",
|
||||
}
|
||||
|
||||
|
||||
def classify_intent(message: str) -> str:
|
||||
msg = message.lower()
|
||||
words = set(msg.split())
|
||||
|
||||
# Agent name + query word → status
|
||||
if words & AGENT_NAMES:
|
||||
if any(p in msg for p in STATUS_PHRASES) or words & {"status", "check", "output", "run"}:
|
||||
return "status"
|
||||
|
||||
# Explicit task phrases → task
|
||||
if any(p in msg for p in TASK_PHRASES):
|
||||
return "task"
|
||||
|
||||
# Generic status signal words
|
||||
if any(p in msg for p in STATUS_PHRASES) and words & AGENT_NAMES:
|
||||
return "status"
|
||||
|
||||
# Status if asking purely about the agent ecosystem
|
||||
if words & AGENT_NAMES and not any(p in msg for p in {"build", "implement", "create", "make"}):
|
||||
return "status"
|
||||
|
||||
# Research intent → route to smart model
|
||||
if any(p in msg for p in RESEARCH_PHRASES):
|
||||
return "planning"
|
||||
|
||||
return "planning"
|
||||
|
||||
|
||||
def extract_agent_name(message: str) -> str | None:
|
||||
msg = message.lower()
|
||||
for name in AGENT_NAMES:
|
||||
if name in msg:
|
||||
return name
|
||||
return None
|
||||
|
||||
|
||||
PROJECT_KEYWORDS = {
|
||||
"bni": "BNI Scheduler",
|
||||
"scheduler": "BNI Scheduler",
|
||||
"monitor": "Monitoring",
|
||||
"monitoring": "Monitoring",
|
||||
"grafana": "Monitoring",
|
||||
"maester": "Maester Reports",
|
||||
"report": "Maester Reports",
|
||||
"csf": "Maester Reports",
|
||||
"nist": "Maester Reports",
|
||||
"portal": "Nexum Portal",
|
||||
"authelia": "Nexum Portal",
|
||||
"nexum": "Nexum Portal",
|
||||
}
|
||||
|
||||
|
||||
def extract_project_name(message: str) -> str | None:
|
||||
msg = message.lower()
|
||||
for kw, project in PROJECT_KEYWORDS.items():
|
||||
if kw in msg:
|
||||
return project
|
||||
return None
|
||||
+227
@@ -0,0 +1,227 @@
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import AsyncGenerator
|
||||
|
||||
from fastapi import FastAPI
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from fastapi.responses import JSONResponse, StreamingResponse
|
||||
from pydantic import BaseModel
|
||||
|
||||
from .brain import stream_completion
|
||||
from .intent import classify_intent, extract_agent_name, extract_project_name
|
||||
from .tools import create_plane_issue, get_agent_output, get_all_agent_status
|
||||
|
||||
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(name)s %(levelname)s %(message)s")
|
||||
logger = logging.getLogger("jon-snow")
|
||||
|
||||
AGENT_OS_DIR = Path(os.getenv("AGENT_OS_DIR", "/opt/agent-os"))
|
||||
SITES_DIR = Path(os.getenv("SITES_DIR", "/opt/sites"))
|
||||
|
||||
app = FastAPI(title="Jon Snow", version="0.2.0")
|
||||
app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"])
|
||||
|
||||
SYSTEM_PROMPT = """You are Jon Snow, chief of staff for the NxM home lab agent ecosystem on a self-hosted Linux server (172.27.40.3).
|
||||
|
||||
Live agents you coordinate:
|
||||
- hodor (8200): HTTP gateway — routes requests to Ollama
|
||||
- bran: Daily changelog summariser — runs 06:00 SAST, writes to /opt/sites/changelog/
|
||||
- varys: Infrastructure monitor — runs every 15 min, HTTP health checks + agent watchdog
|
||||
- sam (8500): Research agent — SearXNG + Ollama synthesis
|
||||
- raven (8400): Notifications — Discord webhook + Gmail SMTP
|
||||
- qyburn (8700): LLM coding agent — qwen2.5-coder:14b, approve/reject workflow
|
||||
- citadel (8300): MCP tool registry — 16 tools including Plane integration
|
||||
|
||||
Infrastructure: Ubuntu Docker host 172.27.40.3, Ollama at 172.27.40.20:11434, Netbird VPN (100.119.x.x), Plane project management, Gitea at git.nxm.co.za.
|
||||
|
||||
Your current capabilities (Phase 2):
|
||||
1. Report agent status — last run time, success/failure, output summary
|
||||
2. Log tasks to Plane project management
|
||||
3. Answer questions about the infrastructure
|
||||
4. Route complex questions to your SMART_MODEL brain
|
||||
|
||||
You cannot yet execute tasks autonomously — that is Phase 3. When a user submits a task, log it to Plane and confirm.
|
||||
Be concise — the user is often on mobile. Use short markdown lists, not long paragraphs."""
|
||||
|
||||
|
||||
# --- OpenAI-compatible request/response models ---
|
||||
|
||||
class Message(BaseModel):
|
||||
role: str
|
||||
content: str
|
||||
|
||||
|
||||
class ChatRequest(BaseModel):
|
||||
model: str = "jon-snow"
|
||||
messages: list[Message]
|
||||
stream: bool = True
|
||||
temperature: float | None = None
|
||||
max_tokens: int | None = None
|
||||
|
||||
|
||||
# --- Output helpers ---
|
||||
|
||||
def _write_status(intent: str, summary: str, status: str = "success") -> None:
|
||||
log_dir = AGENT_OS_DIR / "logs" / "jon-snow"
|
||||
log_dir.mkdir(parents=True, exist_ok=True)
|
||||
payload = {
|
||||
"agent": "jon-snow",
|
||||
"timestamp": datetime.now(timezone.utc).isoformat(),
|
||||
"status": status,
|
||||
"result": f"[{intent}] {summary[:200]}",
|
||||
}
|
||||
(log_dir / "last-run.json").write_text(json.dumps(payload, indent=2))
|
||||
|
||||
out_dir = SITES_DIR / "jon-snow"
|
||||
out_dir.mkdir(parents=True, exist_ok=True)
|
||||
(out_dir / "last-output.md").write_text(
|
||||
f"# Jon Snow — Last Response\n\n**{payload['timestamp']}**\n\nIntent: `{intent}`\n\n{summary[:500]}\n"
|
||||
)
|
||||
|
||||
|
||||
# --- SSE streaming helpers ---
|
||||
|
||||
def _sse_chunk(content: str, chunk_id: str) -> str:
|
||||
data = {
|
||||
"id": chunk_id,
|
||||
"object": "chat.completion.chunk",
|
||||
"created": int(time.time()),
|
||||
"model": "jon-snow",
|
||||
"choices": [{"index": 0, "delta": {"content": content}, "finish_reason": None}],
|
||||
}
|
||||
return f"data: {json.dumps(data)}\n\n"
|
||||
|
||||
|
||||
def _sse_done(chunk_id: str) -> str:
|
||||
data = {
|
||||
"id": chunk_id,
|
||||
"object": "chat.completion.chunk",
|
||||
"created": int(time.time()),
|
||||
"model": "jon-snow",
|
||||
"choices": [{"index": 0, "delta": {}, "finish_reason": "stop"}],
|
||||
}
|
||||
return f"data: {json.dumps(data)}\n\ndata: [DONE]\n\n"
|
||||
|
||||
|
||||
async def _stream_text(text: str) -> AsyncGenerator[str, None]:
|
||||
chunk_id = f"chatcmpl-{int(time.time())}"
|
||||
# Emit word by word for a natural feel
|
||||
words = text.split(" ")
|
||||
for i, word in enumerate(words):
|
||||
token = word + (" " if i < len(words) - 1 else "")
|
||||
yield _sse_chunk(token, chunk_id)
|
||||
await asyncio.sleep(0.005)
|
||||
yield _sse_done(chunk_id)
|
||||
|
||||
|
||||
async def _stream_llm(messages: list[dict], use_smart: bool = False) -> AsyncGenerator[str, None]:
|
||||
chunk_id = f"chatcmpl-{int(time.time())}"
|
||||
collected = []
|
||||
try:
|
||||
response = await stream_completion(messages, use_smart=use_smart)
|
||||
async for chunk in response:
|
||||
if chunk.choices and chunk.choices[0].delta.content:
|
||||
content = chunk.choices[0].delta.content
|
||||
collected.append(content)
|
||||
yield _sse_chunk(content, chunk_id)
|
||||
yield _sse_done(chunk_id)
|
||||
except Exception as e:
|
||||
logger.error(f"LLM stream error: {e}")
|
||||
error_msg = f"Error reaching LLM: {e}"
|
||||
async for part in _stream_text(error_msg):
|
||||
yield part
|
||||
collected.append(error_msg)
|
||||
return
|
||||
|
||||
|
||||
# --- Routes ---
|
||||
|
||||
@app.get("/health")
|
||||
async def health():
|
||||
return {"status": "ok", "agent": "jon-snow", "version": "0.2.0"}
|
||||
|
||||
|
||||
@app.get("/v1/models")
|
||||
async def list_models():
|
||||
return {
|
||||
"object": "list",
|
||||
"data": [{"id": "jon-snow", "object": "model", "created": 0, "owned_by": "nxm"}],
|
||||
}
|
||||
|
||||
|
||||
@app.post("/v1/chat/completions")
|
||||
async def chat_completions(req: ChatRequest):
|
||||
user_message = next((m.content for m in reversed(req.messages) if m.role == "user"), "")
|
||||
intent = classify_intent(user_message)
|
||||
logger.info(f"intent={intent} msg={user_message[:100]!r}")
|
||||
|
||||
messages = [{"role": "system", "content": SYSTEM_PROMPT}]
|
||||
messages += [{"role": m.role, "content": m.content} for m in req.messages]
|
||||
|
||||
async def generate() -> AsyncGenerator[str, None]:
|
||||
summary = ""
|
||||
|
||||
if intent == "status":
|
||||
status_ctx = get_all_agent_status(AGENT_OS_DIR)
|
||||
agent_name = extract_agent_name(user_message)
|
||||
if agent_name:
|
||||
output = get_agent_output(SITES_DIR, agent_name)
|
||||
if output:
|
||||
status_ctx += f"\n\n### {agent_name} last output:\n{output}"
|
||||
|
||||
messages[0]["content"] += f"\n\n## Current Agent Status\n{status_ctx}"
|
||||
async for chunk in _stream_llm(messages, use_smart=False):
|
||||
yield chunk
|
||||
summary = f"Status query: {user_message[:100]}"
|
||||
|
||||
elif intent == "task":
|
||||
project_hint = extract_project_name(user_message)
|
||||
title = user_message.strip()
|
||||
try:
|
||||
issue = await create_plane_issue(title, project_hint)
|
||||
response_text = (
|
||||
f"Task logged to Plane.\n\n"
|
||||
f"**{issue['title']}** \n"
|
||||
f"Project: *{issue['project']}* | #{issue['sequence_id']}\n\n"
|
||||
f"I can't execute tasks yet (Phase 3). It's in the backlog."
|
||||
)
|
||||
summary = f"Task created: {issue['title']}"
|
||||
except Exception as e:
|
||||
response_text = f"Couldn't log to Plane ({e}). Task noted locally: {user_message[:100]}"
|
||||
summary = f"Plane error: {e}"
|
||||
async for chunk in _stream_text(response_text):
|
||||
yield chunk
|
||||
|
||||
else: # planning / general
|
||||
async for chunk in _stream_llm(messages, use_smart=True):
|
||||
yield chunk
|
||||
summary = f"Planning query: {user_message[:100]}"
|
||||
|
||||
_write_status(intent, summary)
|
||||
|
||||
if req.stream:
|
||||
return StreamingResponse(generate(), media_type="text/event-stream")
|
||||
|
||||
# Non-streaming: collect full response
|
||||
full_text = ""
|
||||
async for chunk in generate():
|
||||
if chunk.startswith("data: ") and "[DONE]" not in chunk:
|
||||
try:
|
||||
data = json.loads(chunk[6:])
|
||||
token = data.get("choices", [{}])[0].get("delta", {}).get("content", "")
|
||||
full_text += token
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return JSONResponse({
|
||||
"id": f"chatcmpl-{int(time.time())}",
|
||||
"object": "chat.completion",
|
||||
"created": int(time.time()),
|
||||
"model": "jon-snow",
|
||||
"choices": [{"index": 0, "message": {"role": "assistant", "content": full_text}, "finish_reason": "stop"}],
|
||||
"usage": {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0},
|
||||
})
|
||||
+112
@@ -0,0 +1,112 @@
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import httpx
|
||||
|
||||
logger = logging.getLogger("jon-snow.tools")
|
||||
|
||||
PLANE_URL = os.getenv("PLANE_URL", "http://172.27.40.3:8095")
|
||||
PLANE_WORKSPACE = os.getenv("PLANE_WORKSPACE", "nxm")
|
||||
PLANE_API_KEY = os.getenv("PLANE_API_KEY", "")
|
||||
|
||||
AGENT_FULL_NAMES = [
|
||||
"hodor-gateway", "bran-changelog", "varys-monitor", "sam-research",
|
||||
"raven-notify", "qyburn-coder", "citadel-mcp", "jon-snow",
|
||||
]
|
||||
|
||||
SHORT_TO_FULL = {
|
||||
"hodor": "hodor-gateway",
|
||||
"bran": "bran-changelog",
|
||||
"varys": "varys-monitor",
|
||||
"sam": "sam-research",
|
||||
"raven": "raven-notify",
|
||||
"qyburn": "qyburn-coder",
|
||||
"citadel": "citadel-mcp",
|
||||
"jon": "jon-snow",
|
||||
}
|
||||
|
||||
|
||||
def get_all_agent_status(agent_os_dir: Path) -> str:
|
||||
logs_dir = agent_os_dir / "logs"
|
||||
if not logs_dir.exists():
|
||||
return "Agent log directory not found."
|
||||
|
||||
lines = []
|
||||
for full_name in AGENT_FULL_NAMES:
|
||||
log_file = logs_dir / full_name / "last-run.json"
|
||||
if log_file.exists():
|
||||
try:
|
||||
data = json.loads(log_file.read_text())
|
||||
ts = data.get("timestamp", "unknown")[:19].replace("T", " ")
|
||||
status = data.get("status", "unknown")
|
||||
result = data.get("result", "")[:120]
|
||||
icon = "OK" if status == "success" else "FAIL"
|
||||
lines.append(f"- **{full_name}**: [{icon}] {ts} UTC — {result}")
|
||||
except Exception as e:
|
||||
lines.append(f"- **{full_name}**: error reading log ({e})")
|
||||
else:
|
||||
lines.append(f"- **{full_name}**: no log (never run or not yet deployed)")
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def get_agent_output(sites_dir: Path, agent_name: str) -> str | None:
|
||||
short_name = agent_name if agent_name in SHORT_TO_FULL else next(
|
||||
(k for k, v in SHORT_TO_FULL.items() if v == agent_name), agent_name
|
||||
)
|
||||
output_file = sites_dir / short_name / "last-output.md"
|
||||
if not output_file.exists():
|
||||
return None
|
||||
content = output_file.read_text()
|
||||
return content[:3000]
|
||||
|
||||
|
||||
async def get_plane_projects() -> list[dict]:
|
||||
headers = {"X-Api-Key": PLANE_API_KEY}
|
||||
async with httpx.AsyncClient() as client:
|
||||
resp = await client.get(
|
||||
f"{PLANE_URL}/api/v1/workspaces/{PLANE_WORKSPACE}/projects/",
|
||||
headers=headers, timeout=10,
|
||||
)
|
||||
resp.raise_for_status()
|
||||
return resp.json().get("results", [])
|
||||
|
||||
|
||||
def _resolve_project(projects: list[dict], hint: str | None) -> dict:
|
||||
if hint:
|
||||
hint_lower = hint.lower()
|
||||
for p in projects:
|
||||
if hint_lower in p["name"].lower():
|
||||
return p
|
||||
# Default: Agent Ecosystem
|
||||
for p in projects:
|
||||
if "agent" in p["name"].lower():
|
||||
return p
|
||||
return projects[0]
|
||||
|
||||
|
||||
async def create_plane_issue(title: str, project_hint: str | None = None) -> dict:
|
||||
projects = await get_plane_projects()
|
||||
if not projects:
|
||||
raise ValueError("No Plane projects found")
|
||||
|
||||
project = _resolve_project(projects, project_hint)
|
||||
|
||||
headers = {"X-Api-Key": PLANE_API_KEY, "Content-Type": "application/json"}
|
||||
async with httpx.AsyncClient() as client:
|
||||
resp = await client.post(
|
||||
f"{PLANE_URL}/api/v1/workspaces/{PLANE_WORKSPACE}/projects/{project['id']}/issues/",
|
||||
headers=headers,
|
||||
json={"name": title[:255], "priority": "none"},
|
||||
timeout=10,
|
||||
)
|
||||
resp.raise_for_status()
|
||||
issue = resp.json()
|
||||
|
||||
return {
|
||||
"sequence_id": issue.get("sequence_id", "?"),
|
||||
"title": title[:80],
|
||||
"project": project["name"],
|
||||
}
|
||||
@@ -0,0 +1,26 @@
|
||||
services:
|
||||
jon-snow:
|
||||
build: .
|
||||
container_name: jon-snow
|
||||
restart: unless-stopped
|
||||
ports:
|
||||
- "8900:8900"
|
||||
volumes:
|
||||
- /opt/agent-os:/opt/agent-os:rw
|
||||
- /opt/sites:/opt/sites:rw
|
||||
environment:
|
||||
FAST_MODEL: ${FAST_MODEL:-ollama/gemma4}
|
||||
SMART_MODEL: ${SMART_MODEL:-ollama/gemma4}
|
||||
OLLAMA_BASE_URL: ${OLLAMA_BASE_URL:-http://172.27.40.20:11434}
|
||||
ANTHROPIC_API_KEY: ${ANTHROPIC_API_KEY:-}
|
||||
PLANE_URL: ${PLANE_URL:-http://172.27.40.3:8095}
|
||||
PLANE_WORKSPACE: ${PLANE_WORKSPACE:-nxm}
|
||||
PLANE_API_KEY: ${PLANE_API_KEY}
|
||||
AGENT_OS_DIR: /opt/agent-os
|
||||
SITES_DIR: /opt/sites
|
||||
networks:
|
||||
- proxy
|
||||
|
||||
networks:
|
||||
proxy:
|
||||
external: true
|
||||
@@ -0,0 +1,6 @@
|
||||
fastapi==0.115.12
|
||||
uvicorn[standard]==0.34.3
|
||||
litellm==1.72.6
|
||||
httpx==0.28.1
|
||||
pydantic==2.11.4
|
||||
python-multipart==0.0.20
|
||||
Reference in New Issue
Block a user