commit a25deeb8f4fb2d728615ca70adeb33a68eac36c8 Author: Jaco Bezuidenhout Date: Sat May 16 13:06:20 2026 +0000 feat: Jon Snow Phase 2 — FastAPI orchestrator with LiteLLM brain OpenAI-compatible API at :8900. Intent classifier routes status queries to FAST_MODEL (Ollama), task submissions to Plane, planning to SMART_MODEL. Reads agent-os logs for status context. Phase 3: approval gate + execution. Co-Authored-By: Claude Sonnet 4.6 diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..cff5543 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +.env +__pycache__/ +*.pyc diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..7366567 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,11 @@ +FROM python:3.12-slim + +WORKDIR /app + +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt + +COPY app/ ./app/ + +EXPOSE 8900 +CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8900", "--log-level", "info"] diff --git a/app/__init__.py b/app/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/app/brain.py b/app/brain.py new file mode 100644 index 0000000..163c83f --- /dev/null +++ b/app/brain.py @@ -0,0 +1,40 @@ +import logging +import os + +import litellm + +logger = logging.getLogger("jon-snow.brain") +litellm.set_verbose = False + +FAST_MODEL = os.getenv("FAST_MODEL", "ollama/gemma4") +SMART_MODEL = os.getenv("SMART_MODEL", "ollama/gemma4") +OLLAMA_BASE_URL = os.getenv("OLLAMA_BASE_URL", "http://172.27.40.20:11434") + + +def _extra_kwargs(model: str) -> dict: + if model.startswith("ollama/"): + return {"api_base": OLLAMA_BASE_URL} + return {} + + +async def stream_completion(messages: list[dict], use_smart: bool = False): + model = SMART_MODEL if use_smart else FAST_MODEL + logger.info(f"Brain: model={model} smart={use_smart}") + try: + return await litellm.acompletion( + model=model, + messages=messages, + stream=True, + **_extra_kwargs(model), + ) + except Exception as e: + logger.error(f"Brain error ({model}): {e}") + if use_smart and model != FAST_MODEL: + logger.info("Falling back to FAST_MODEL") + return await litellm.acompletion( + model=FAST_MODEL, + messages=messages, + stream=True, + **_extra_kwargs(FAST_MODEL), + ) + raise diff --git a/app/intent.py b/app/intent.py new file mode 100644 index 0000000..09aa067 --- /dev/null +++ b/app/intent.py @@ -0,0 +1,78 @@ +AGENT_NAMES = {"hodor", "bran", "varys", "sam", "raven", "qyburn", "citadel", "jon"} + +STATUS_PHRASES = { + "status", "health", "running", "last run", "what did", "when did", + "show me", "how is", "is it", "is running", "did it run", "output", + "summary", "report", "check", "monitor", "alive", "up", +} + +TASK_PHRASES = { + "create task", "add task", "add issue", "create issue", "log task", + "log this", "new task", "new issue", "add to plane", "add to backlog", + "plan", "schedule", "remind", "track", "todo", "to do", +} + +RESEARCH_PHRASES = { + "research", "search", "find out", "look up", "what is", "explain", + "how does", "documentation", "docs", +} + + +def classify_intent(message: str) -> str: + msg = message.lower() + words = set(msg.split()) + + # Agent name + query word → status + if words & AGENT_NAMES: + if any(p in msg for p in STATUS_PHRASES) or words & {"status", "check", "output", "run"}: + return "status" + + # Explicit task phrases → task + if any(p in msg for p in TASK_PHRASES): + return "task" + + # Generic status signal words + if any(p in msg for p in STATUS_PHRASES) and words & AGENT_NAMES: + return "status" + + # Status if asking purely about the agent ecosystem + if words & AGENT_NAMES and not any(p in msg for p in {"build", "implement", "create", "make"}): + return "status" + + # Research intent → route to smart model + if any(p in msg for p in RESEARCH_PHRASES): + return "planning" + + return "planning" + + +def extract_agent_name(message: str) -> str | None: + msg = message.lower() + for name in AGENT_NAMES: + if name in msg: + return name + return None + + +PROJECT_KEYWORDS = { + "bni": "BNI Scheduler", + "scheduler": "BNI Scheduler", + "monitor": "Monitoring", + "monitoring": "Monitoring", + "grafana": "Monitoring", + "maester": "Maester Reports", + "report": "Maester Reports", + "csf": "Maester Reports", + "nist": "Maester Reports", + "portal": "Nexum Portal", + "authelia": "Nexum Portal", + "nexum": "Nexum Portal", +} + + +def extract_project_name(message: str) -> str | None: + msg = message.lower() + for kw, project in PROJECT_KEYWORDS.items(): + if kw in msg: + return project + return None diff --git a/app/main.py b/app/main.py new file mode 100644 index 0000000..991c376 --- /dev/null +++ b/app/main.py @@ -0,0 +1,227 @@ +import asyncio +import json +import logging +import os +import time +from datetime import datetime, timezone +from pathlib import Path +from typing import AsyncGenerator + +from fastapi import FastAPI +from fastapi.middleware.cors import CORSMiddleware +from fastapi.responses import JSONResponse, StreamingResponse +from pydantic import BaseModel + +from .brain import stream_completion +from .intent import classify_intent, extract_agent_name, extract_project_name +from .tools import create_plane_issue, get_agent_output, get_all_agent_status + +logging.basicConfig(level=logging.INFO, format="%(asctime)s %(name)s %(levelname)s %(message)s") +logger = logging.getLogger("jon-snow") + +AGENT_OS_DIR = Path(os.getenv("AGENT_OS_DIR", "/opt/agent-os")) +SITES_DIR = Path(os.getenv("SITES_DIR", "/opt/sites")) + +app = FastAPI(title="Jon Snow", version="0.2.0") +app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"]) + +SYSTEM_PROMPT = """You are Jon Snow, chief of staff for the NxM home lab agent ecosystem on a self-hosted Linux server (172.27.40.3). + +Live agents you coordinate: +- hodor (8200): HTTP gateway — routes requests to Ollama +- bran: Daily changelog summariser — runs 06:00 SAST, writes to /opt/sites/changelog/ +- varys: Infrastructure monitor — runs every 15 min, HTTP health checks + agent watchdog +- sam (8500): Research agent — SearXNG + Ollama synthesis +- raven (8400): Notifications — Discord webhook + Gmail SMTP +- qyburn (8700): LLM coding agent — qwen2.5-coder:14b, approve/reject workflow +- citadel (8300): MCP tool registry — 16 tools including Plane integration + +Infrastructure: Ubuntu Docker host 172.27.40.3, Ollama at 172.27.40.20:11434, Netbird VPN (100.119.x.x), Plane project management, Gitea at git.nxm.co.za. + +Your current capabilities (Phase 2): +1. Report agent status — last run time, success/failure, output summary +2. Log tasks to Plane project management +3. Answer questions about the infrastructure +4. Route complex questions to your SMART_MODEL brain + +You cannot yet execute tasks autonomously — that is Phase 3. When a user submits a task, log it to Plane and confirm. +Be concise — the user is often on mobile. Use short markdown lists, not long paragraphs.""" + + +# --- OpenAI-compatible request/response models --- + +class Message(BaseModel): + role: str + content: str + + +class ChatRequest(BaseModel): + model: str = "jon-snow" + messages: list[Message] + stream: bool = True + temperature: float | None = None + max_tokens: int | None = None + + +# --- Output helpers --- + +def _write_status(intent: str, summary: str, status: str = "success") -> None: + log_dir = AGENT_OS_DIR / "logs" / "jon-snow" + log_dir.mkdir(parents=True, exist_ok=True) + payload = { + "agent": "jon-snow", + "timestamp": datetime.now(timezone.utc).isoformat(), + "status": status, + "result": f"[{intent}] {summary[:200]}", + } + (log_dir / "last-run.json").write_text(json.dumps(payload, indent=2)) + + out_dir = SITES_DIR / "jon-snow" + out_dir.mkdir(parents=True, exist_ok=True) + (out_dir / "last-output.md").write_text( + f"# Jon Snow — Last Response\n\n**{payload['timestamp']}**\n\nIntent: `{intent}`\n\n{summary[:500]}\n" + ) + + +# --- SSE streaming helpers --- + +def _sse_chunk(content: str, chunk_id: str) -> str: + data = { + "id": chunk_id, + "object": "chat.completion.chunk", + "created": int(time.time()), + "model": "jon-snow", + "choices": [{"index": 0, "delta": {"content": content}, "finish_reason": None}], + } + return f"data: {json.dumps(data)}\n\n" + + +def _sse_done(chunk_id: str) -> str: + data = { + "id": chunk_id, + "object": "chat.completion.chunk", + "created": int(time.time()), + "model": "jon-snow", + "choices": [{"index": 0, "delta": {}, "finish_reason": "stop"}], + } + return f"data: {json.dumps(data)}\n\ndata: [DONE]\n\n" + + +async def _stream_text(text: str) -> AsyncGenerator[str, None]: + chunk_id = f"chatcmpl-{int(time.time())}" + # Emit word by word for a natural feel + words = text.split(" ") + for i, word in enumerate(words): + token = word + (" " if i < len(words) - 1 else "") + yield _sse_chunk(token, chunk_id) + await asyncio.sleep(0.005) + yield _sse_done(chunk_id) + + +async def _stream_llm(messages: list[dict], use_smart: bool = False) -> AsyncGenerator[str, None]: + chunk_id = f"chatcmpl-{int(time.time())}" + collected = [] + try: + response = await stream_completion(messages, use_smart=use_smart) + async for chunk in response: + if chunk.choices and chunk.choices[0].delta.content: + content = chunk.choices[0].delta.content + collected.append(content) + yield _sse_chunk(content, chunk_id) + yield _sse_done(chunk_id) + except Exception as e: + logger.error(f"LLM stream error: {e}") + error_msg = f"Error reaching LLM: {e}" + async for part in _stream_text(error_msg): + yield part + collected.append(error_msg) + return + + +# --- Routes --- + +@app.get("/health") +async def health(): + return {"status": "ok", "agent": "jon-snow", "version": "0.2.0"} + + +@app.get("/v1/models") +async def list_models(): + return { + "object": "list", + "data": [{"id": "jon-snow", "object": "model", "created": 0, "owned_by": "nxm"}], + } + + +@app.post("/v1/chat/completions") +async def chat_completions(req: ChatRequest): + user_message = next((m.content for m in reversed(req.messages) if m.role == "user"), "") + intent = classify_intent(user_message) + logger.info(f"intent={intent} msg={user_message[:100]!r}") + + messages = [{"role": "system", "content": SYSTEM_PROMPT}] + messages += [{"role": m.role, "content": m.content} for m in req.messages] + + async def generate() -> AsyncGenerator[str, None]: + summary = "" + + if intent == "status": + status_ctx = get_all_agent_status(AGENT_OS_DIR) + agent_name = extract_agent_name(user_message) + if agent_name: + output = get_agent_output(SITES_DIR, agent_name) + if output: + status_ctx += f"\n\n### {agent_name} last output:\n{output}" + + messages[0]["content"] += f"\n\n## Current Agent Status\n{status_ctx}" + async for chunk in _stream_llm(messages, use_smart=False): + yield chunk + summary = f"Status query: {user_message[:100]}" + + elif intent == "task": + project_hint = extract_project_name(user_message) + title = user_message.strip() + try: + issue = await create_plane_issue(title, project_hint) + response_text = ( + f"Task logged to Plane.\n\n" + f"**{issue['title']}** \n" + f"Project: *{issue['project']}* | #{issue['sequence_id']}\n\n" + f"I can't execute tasks yet (Phase 3). It's in the backlog." + ) + summary = f"Task created: {issue['title']}" + except Exception as e: + response_text = f"Couldn't log to Plane ({e}). Task noted locally: {user_message[:100]}" + summary = f"Plane error: {e}" + async for chunk in _stream_text(response_text): + yield chunk + + else: # planning / general + async for chunk in _stream_llm(messages, use_smart=True): + yield chunk + summary = f"Planning query: {user_message[:100]}" + + _write_status(intent, summary) + + if req.stream: + return StreamingResponse(generate(), media_type="text/event-stream") + + # Non-streaming: collect full response + full_text = "" + async for chunk in generate(): + if chunk.startswith("data: ") and "[DONE]" not in chunk: + try: + data = json.loads(chunk[6:]) + token = data.get("choices", [{}])[0].get("delta", {}).get("content", "") + full_text += token + except Exception: + pass + + return JSONResponse({ + "id": f"chatcmpl-{int(time.time())}", + "object": "chat.completion", + "created": int(time.time()), + "model": "jon-snow", + "choices": [{"index": 0, "message": {"role": "assistant", "content": full_text}, "finish_reason": "stop"}], + "usage": {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0}, + }) diff --git a/app/tools.py b/app/tools.py new file mode 100644 index 0000000..8d1b122 --- /dev/null +++ b/app/tools.py @@ -0,0 +1,112 @@ +import json +import logging +import os +from pathlib import Path + +import httpx + +logger = logging.getLogger("jon-snow.tools") + +PLANE_URL = os.getenv("PLANE_URL", "http://172.27.40.3:8095") +PLANE_WORKSPACE = os.getenv("PLANE_WORKSPACE", "nxm") +PLANE_API_KEY = os.getenv("PLANE_API_KEY", "") + +AGENT_FULL_NAMES = [ + "hodor-gateway", "bran-changelog", "varys-monitor", "sam-research", + "raven-notify", "qyburn-coder", "citadel-mcp", "jon-snow", +] + +SHORT_TO_FULL = { + "hodor": "hodor-gateway", + "bran": "bran-changelog", + "varys": "varys-monitor", + "sam": "sam-research", + "raven": "raven-notify", + "qyburn": "qyburn-coder", + "citadel": "citadel-mcp", + "jon": "jon-snow", +} + + +def get_all_agent_status(agent_os_dir: Path) -> str: + logs_dir = agent_os_dir / "logs" + if not logs_dir.exists(): + return "Agent log directory not found." + + lines = [] + for full_name in AGENT_FULL_NAMES: + log_file = logs_dir / full_name / "last-run.json" + if log_file.exists(): + try: + data = json.loads(log_file.read_text()) + ts = data.get("timestamp", "unknown")[:19].replace("T", " ") + status = data.get("status", "unknown") + result = data.get("result", "")[:120] + icon = "OK" if status == "success" else "FAIL" + lines.append(f"- **{full_name}**: [{icon}] {ts} UTC — {result}") + except Exception as e: + lines.append(f"- **{full_name}**: error reading log ({e})") + else: + lines.append(f"- **{full_name}**: no log (never run or not yet deployed)") + + return "\n".join(lines) + + +def get_agent_output(sites_dir: Path, agent_name: str) -> str | None: + short_name = agent_name if agent_name in SHORT_TO_FULL else next( + (k for k, v in SHORT_TO_FULL.items() if v == agent_name), agent_name + ) + output_file = sites_dir / short_name / "last-output.md" + if not output_file.exists(): + return None + content = output_file.read_text() + return content[:3000] + + +async def get_plane_projects() -> list[dict]: + headers = {"X-Api-Key": PLANE_API_KEY} + async with httpx.AsyncClient() as client: + resp = await client.get( + f"{PLANE_URL}/api/v1/workspaces/{PLANE_WORKSPACE}/projects/", + headers=headers, timeout=10, + ) + resp.raise_for_status() + return resp.json().get("results", []) + + +def _resolve_project(projects: list[dict], hint: str | None) -> dict: + if hint: + hint_lower = hint.lower() + for p in projects: + if hint_lower in p["name"].lower(): + return p + # Default: Agent Ecosystem + for p in projects: + if "agent" in p["name"].lower(): + return p + return projects[0] + + +async def create_plane_issue(title: str, project_hint: str | None = None) -> dict: + projects = await get_plane_projects() + if not projects: + raise ValueError("No Plane projects found") + + project = _resolve_project(projects, project_hint) + + headers = {"X-Api-Key": PLANE_API_KEY, "Content-Type": "application/json"} + async with httpx.AsyncClient() as client: + resp = await client.post( + f"{PLANE_URL}/api/v1/workspaces/{PLANE_WORKSPACE}/projects/{project['id']}/issues/", + headers=headers, + json={"name": title[:255], "priority": "none"}, + timeout=10, + ) + resp.raise_for_status() + issue = resp.json() + + return { + "sequence_id": issue.get("sequence_id", "?"), + "title": title[:80], + "project": project["name"], + } diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..41b374e --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,26 @@ +services: + jon-snow: + build: . + container_name: jon-snow + restart: unless-stopped + ports: + - "8900:8900" + volumes: + - /opt/agent-os:/opt/agent-os:rw + - /opt/sites:/opt/sites:rw + environment: + FAST_MODEL: ${FAST_MODEL:-ollama/gemma4} + SMART_MODEL: ${SMART_MODEL:-ollama/gemma4} + OLLAMA_BASE_URL: ${OLLAMA_BASE_URL:-http://172.27.40.20:11434} + ANTHROPIC_API_KEY: ${ANTHROPIC_API_KEY:-} + PLANE_URL: ${PLANE_URL:-http://172.27.40.3:8095} + PLANE_WORKSPACE: ${PLANE_WORKSPACE:-nxm} + PLANE_API_KEY: ${PLANE_API_KEY} + AGENT_OS_DIR: /opt/agent-os + SITES_DIR: /opt/sites + networks: + - proxy + +networks: + proxy: + external: true diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..23128d4 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,6 @@ +fastapi==0.115.12 +uvicorn[standard]==0.34.3 +litellm==1.72.6 +httpx==0.28.1 +pydantic==2.11.4 +python-multipart==0.0.20