feat: Jon Snow Phase 2 — FastAPI orchestrator with LiteLLM brain

OpenAI-compatible API at :8900. Intent classifier routes status queries to FAST_MODEL (Ollama), task submissions to Plane, planning to SMART_MODEL. Reads agent-os logs for status context. Phase 3: approval gate + execution. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-16 13:06:20 +00:00
commit a25deeb8f4
9 changed files with 503 additions and 0 deletions
@@ -0,0 +1,3 @@
+.env
+__pycache__/
+*.pyc
@@ -0,0 +1,11 @@
+FROM python:3.12-slim
+
+WORKDIR /app
+
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+
+COPY app/ ./app/
+
+EXPOSE 8900
+CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8900", "--log-level", "info"]
@@ -0,0 +1,40 @@
+import logging
+import os
+
+import litellm
+
+logger = logging.getLogger("jon-snow.brain")
+litellm.set_verbose = False
+
+FAST_MODEL = os.getenv("FAST_MODEL", "ollama/gemma4")
+SMART_MODEL = os.getenv("SMART_MODEL", "ollama/gemma4")
+OLLAMA_BASE_URL = os.getenv("OLLAMA_BASE_URL", "http://172.27.40.20:11434")
+
+
+def _extra_kwargs(model: str) -> dict:
+    if model.startswith("ollama/"):
+        return {"api_base": OLLAMA_BASE_URL}
+    return {}
+
+
+async def stream_completion(messages: list[dict], use_smart: bool = False):
+    model = SMART_MODEL if use_smart else FAST_MODEL
+    logger.info(f"Brain: model={model} smart={use_smart}")
+    try:
+        return await litellm.acompletion(
+            model=model,
+            messages=messages,
+            stream=True,
+            **_extra_kwargs(model),
+        )
+    except Exception as e:
+        logger.error(f"Brain error ({model}): {e}")
+        if use_smart and model != FAST_MODEL:
+            logger.info("Falling back to FAST_MODEL")
+            return await litellm.acompletion(
+                model=FAST_MODEL,
+                messages=messages,
+                stream=True,
+                **_extra_kwargs(FAST_MODEL),
+            )
+        raise
@@ -0,0 +1,78 @@
+AGENT_NAMES = {"hodor", "bran", "varys", "sam", "raven", "qyburn", "citadel", "jon"}
+
+STATUS_PHRASES = {
+    "status", "health", "running", "last run", "what did", "when did",
+    "show me", "how is", "is it", "is running", "did it run", "output",
+    "summary", "report", "check", "monitor", "alive", "up",
+}
+
+TASK_PHRASES = {
+    "create task", "add task", "add issue", "create issue", "log task",
+    "log this", "new task", "new issue", "add to plane", "add to backlog",
+    "plan", "schedule", "remind", "track", "todo", "to do",
+}
+
+RESEARCH_PHRASES = {
+    "research", "search", "find out", "look up", "what is", "explain",
+    "how does", "documentation", "docs",
+}
+
+
+def classify_intent(message: str) -> str:
+    msg = message.lower()
+    words = set(msg.split())
+
+    # Agent name + query word → status
+    if words & AGENT_NAMES:
+        if any(p in msg for p in STATUS_PHRASES) or words & {"status", "check", "output", "run"}:
+            return "status"
+
+    # Explicit task phrases → task
+    if any(p in msg for p in TASK_PHRASES):
+        return "task"
+
+    # Generic status signal words
+    if any(p in msg for p in STATUS_PHRASES) and words & AGENT_NAMES:
+        return "status"
+
+    # Status if asking purely about the agent ecosystem
+    if words & AGENT_NAMES and not any(p in msg for p in {"build", "implement", "create", "make"}):
+        return "status"
+
+    # Research intent → route to smart model
+    if any(p in msg for p in RESEARCH_PHRASES):
+        return "planning"
+
+    return "planning"
+
+
+def extract_agent_name(message: str) -> str | None:
+    msg = message.lower()
+    for name in AGENT_NAMES:
+        if name in msg:
+            return name
+    return None
+
+
+PROJECT_KEYWORDS = {
+    "bni": "BNI Scheduler",
+    "scheduler": "BNI Scheduler",
+    "monitor": "Monitoring",
+    "monitoring": "Monitoring",
+    "grafana": "Monitoring",
+    "maester": "Maester Reports",
+    "report": "Maester Reports",
+    "csf": "Maester Reports",
+    "nist": "Maester Reports",
+    "portal": "Nexum Portal",
+    "authelia": "Nexum Portal",
+    "nexum": "Nexum Portal",
+}
+
+
+def extract_project_name(message: str) -> str | None:
+    msg = message.lower()
+    for kw, project in PROJECT_KEYWORDS.items():
+        if kw in msg:
+            return project
+    return None
@@ -0,0 +1,227 @@
+import asyncio
+import json
+import logging
+import os
+import time
+from datetime import datetime, timezone
+from pathlib import Path
+from typing import AsyncGenerator
+
+from fastapi import FastAPI
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.responses import JSONResponse, StreamingResponse
+from pydantic import BaseModel
+
+from .brain import stream_completion
+from .intent import classify_intent, extract_agent_name, extract_project_name
+from .tools import create_plane_issue, get_agent_output, get_all_agent_status
+
+logging.basicConfig(level=logging.INFO, format="%(asctime)s %(name)s %(levelname)s %(message)s")
+logger = logging.getLogger("jon-snow")
+
+AGENT_OS_DIR = Path(os.getenv("AGENT_OS_DIR", "/opt/agent-os"))
+SITES_DIR = Path(os.getenv("SITES_DIR", "/opt/sites"))
+
+app = FastAPI(title="Jon Snow", version="0.2.0")
+app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"])
+
+SYSTEM_PROMPT = """You are Jon Snow, chief of staff for the NxM home lab agent ecosystem on a self-hosted Linux server (172.27.40.3).
+
+Live agents you coordinate:
+- hodor (8200): HTTP gateway — routes requests to Ollama
+- bran: Daily changelog summariser — runs 06:00 SAST, writes to /opt/sites/changelog/
+- varys: Infrastructure monitor — runs every 15 min, HTTP health checks + agent watchdog
+- sam (8500): Research agent — SearXNG + Ollama synthesis
+- raven (8400): Notifications — Discord webhook + Gmail SMTP
+- qyburn (8700): LLM coding agent — qwen2.5-coder:14b, approve/reject workflow
+- citadel (8300): MCP tool registry — 16 tools including Plane integration
+
+Infrastructure: Ubuntu Docker host 172.27.40.3, Ollama at 172.27.40.20:11434, Netbird VPN (100.119.x.x), Plane project management, Gitea at git.nxm.co.za.
+
+Your current capabilities (Phase 2):
+1. Report agent status — last run time, success/failure, output summary
+2. Log tasks to Plane project management
+3. Answer questions about the infrastructure
+4. Route complex questions to your SMART_MODEL brain
+
+You cannot yet execute tasks autonomously — that is Phase 3. When a user submits a task, log it to Plane and confirm.
+Be concise — the user is often on mobile. Use short markdown lists, not long paragraphs."""
+
+
+# --- OpenAI-compatible request/response models ---
+
+class Message(BaseModel):
+    role: str
+    content: str
+
+
+class ChatRequest(BaseModel):
+    model: str = "jon-snow"
+    messages: list[Message]
+    stream: bool = True
+    temperature: float | None = None
+    max_tokens: int | None = None
+
+
+# --- Output helpers ---
+
+def _write_status(intent: str, summary: str, status: str = "success") -> None:
+    log_dir = AGENT_OS_DIR / "logs" / "jon-snow"
+    log_dir.mkdir(parents=True, exist_ok=True)
+    payload = {
+        "agent": "jon-snow",
+        "timestamp": datetime.now(timezone.utc).isoformat(),
+        "status": status,
+        "result": f"[{intent}] {summary[:200]}",
+    }
+    (log_dir / "last-run.json").write_text(json.dumps(payload, indent=2))
+
+    out_dir = SITES_DIR / "jon-snow"
+    out_dir.mkdir(parents=True, exist_ok=True)
+    (out_dir / "last-output.md").write_text(
+        f"# Jon Snow — Last Response\n\n**{payload['timestamp']}**\n\nIntent: `{intent}`\n\n{summary[:500]}\n"
+    )
+
+
+# --- SSE streaming helpers ---
+
+def _sse_chunk(content: str, chunk_id: str) -> str:
+    data = {
+        "id": chunk_id,
+        "object": "chat.completion.chunk",
+        "created": int(time.time()),
+        "model": "jon-snow",
+        "choices": [{"index": 0, "delta": {"content": content}, "finish_reason": None}],
+    }
+    return f"data: {json.dumps(data)}\n\n"
+
+
+def _sse_done(chunk_id: str) -> str:
+    data = {
+        "id": chunk_id,
+        "object": "chat.completion.chunk",
+        "created": int(time.time()),
+        "model": "jon-snow",
+        "choices": [{"index": 0, "delta": {}, "finish_reason": "stop"}],
+    }
+    return f"data: {json.dumps(data)}\n\ndata: [DONE]\n\n"
+
+
+async def _stream_text(text: str) -> AsyncGenerator[str, None]:
+    chunk_id = f"chatcmpl-{int(time.time())}"
+    # Emit word by word for a natural feel
+    words = text.split(" ")
+    for i, word in enumerate(words):
+        token = word + (" " if i < len(words) - 1 else "")
+        yield _sse_chunk(token, chunk_id)
+        await asyncio.sleep(0.005)
+    yield _sse_done(chunk_id)
+
+
+async def _stream_llm(messages: list[dict], use_smart: bool = False) -> AsyncGenerator[str, None]:
+    chunk_id = f"chatcmpl-{int(time.time())}"
+    collected = []
+    try:
+        response = await stream_completion(messages, use_smart=use_smart)
+        async for chunk in response:
+            if chunk.choices and chunk.choices[0].delta.content:
+                content = chunk.choices[0].delta.content
+                collected.append(content)
+                yield _sse_chunk(content, chunk_id)
+        yield _sse_done(chunk_id)
+    except Exception as e:
+        logger.error(f"LLM stream error: {e}")
+        error_msg = f"Error reaching LLM: {e}"
+        async for part in _stream_text(error_msg):
+            yield part
+        collected.append(error_msg)
+    return
+
+
+# --- Routes ---
+
+@app.get("/health")
+async def health():
+    return {"status": "ok", "agent": "jon-snow", "version": "0.2.0"}
+
+
+@app.get("/v1/models")
+async def list_models():
+    return {
+        "object": "list",
+        "data": [{"id": "jon-snow", "object": "model", "created": 0, "owned_by": "nxm"}],
+    }
+
+
+@app.post("/v1/chat/completions")
+async def chat_completions(req: ChatRequest):
+    user_message = next((m.content for m in reversed(req.messages) if m.role == "user"), "")
+    intent = classify_intent(user_message)
+    logger.info(f"intent={intent} msg={user_message[:100]!r}")
+
+    messages = [{"role": "system", "content": SYSTEM_PROMPT}]
+    messages += [{"role": m.role, "content": m.content} for m in req.messages]
+
+    async def generate() -> AsyncGenerator[str, None]:
+        summary = ""
+
+        if intent == "status":
+            status_ctx = get_all_agent_status(AGENT_OS_DIR)
+            agent_name = extract_agent_name(user_message)
+            if agent_name:
+                output = get_agent_output(SITES_DIR, agent_name)
+                if output:
+                    status_ctx += f"\n\n### {agent_name} last output:\n{output}"
+
+            messages[0]["content"] += f"\n\n## Current Agent Status\n{status_ctx}"
+            async for chunk in _stream_llm(messages, use_smart=False):
+                yield chunk
+            summary = f"Status query: {user_message[:100]}"
+
+        elif intent == "task":
+            project_hint = extract_project_name(user_message)
+            title = user_message.strip()
+            try:
+                issue = await create_plane_issue(title, project_hint)
+                response_text = (
+                    f"Task logged to Plane.\n\n"
+                    f"**{issue['title']}**  \n"
+                    f"Project: *{issue['project']}* | #{issue['sequence_id']}\n\n"
+                    f"I can't execute tasks yet (Phase 3). It's in the backlog."
+                )
+                summary = f"Task created: {issue['title']}"
+            except Exception as e:
+                response_text = f"Couldn't log to Plane ({e}). Task noted locally: {user_message[:100]}"
+                summary = f"Plane error: {e}"
+            async for chunk in _stream_text(response_text):
+                yield chunk
+
+        else:  # planning / general
+            async for chunk in _stream_llm(messages, use_smart=True):
+                yield chunk
+            summary = f"Planning query: {user_message[:100]}"
+
+        _write_status(intent, summary)
+
+    if req.stream:
+        return StreamingResponse(generate(), media_type="text/event-stream")
+
+    # Non-streaming: collect full response
+    full_text = ""
+    async for chunk in generate():
+        if chunk.startswith("data: ") and "[DONE]" not in chunk:
+            try:
+                data = json.loads(chunk[6:])
+                token = data.get("choices", [{}])[0].get("delta", {}).get("content", "")
+                full_text += token
+            except Exception:
+                pass
+
+    return JSONResponse({
+        "id": f"chatcmpl-{int(time.time())}",
+        "object": "chat.completion",
+        "created": int(time.time()),
+        "model": "jon-snow",
+        "choices": [{"index": 0, "message": {"role": "assistant", "content": full_text}, "finish_reason": "stop"}],
+        "usage": {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0},
+    })
@@ -0,0 +1,112 @@
+import json
+import logging
+import os
+from pathlib import Path
+
+import httpx
+
+logger = logging.getLogger("jon-snow.tools")
+
+PLANE_URL = os.getenv("PLANE_URL", "http://172.27.40.3:8095")
+PLANE_WORKSPACE = os.getenv("PLANE_WORKSPACE", "nxm")
+PLANE_API_KEY = os.getenv("PLANE_API_KEY", "")
+
+AGENT_FULL_NAMES = [
+    "hodor-gateway", "bran-changelog", "varys-monitor", "sam-research",
+    "raven-notify", "qyburn-coder", "citadel-mcp", "jon-snow",
+]
+
+SHORT_TO_FULL = {
+    "hodor": "hodor-gateway",
+    "bran": "bran-changelog",
+    "varys": "varys-monitor",
+    "sam": "sam-research",
+    "raven": "raven-notify",
+    "qyburn": "qyburn-coder",
+    "citadel": "citadel-mcp",
+    "jon": "jon-snow",
+}
+
+
+def get_all_agent_status(agent_os_dir: Path) -> str:
+    logs_dir = agent_os_dir / "logs"
+    if not logs_dir.exists():
+        return "Agent log directory not found."
+
+    lines = []
+    for full_name in AGENT_FULL_NAMES:
+        log_file = logs_dir / full_name / "last-run.json"
+        if log_file.exists():
+            try:
+                data = json.loads(log_file.read_text())
+                ts = data.get("timestamp", "unknown")[:19].replace("T", " ")
+                status = data.get("status", "unknown")
+                result = data.get("result", "")[:120]
+                icon = "OK" if status == "success" else "FAIL"
+                lines.append(f"- **{full_name}**: [{icon}] {ts} UTC — {result}")
+            except Exception as e:
+                lines.append(f"- **{full_name}**: error reading log ({e})")
+        else:
+            lines.append(f"- **{full_name}**: no log (never run or not yet deployed)")
+
+    return "\n".join(lines)
+
+
+def get_agent_output(sites_dir: Path, agent_name: str) -> str | None:
+    short_name = agent_name if agent_name in SHORT_TO_FULL else next(
+        (k for k, v in SHORT_TO_FULL.items() if v == agent_name), agent_name
+    )
+    output_file = sites_dir / short_name / "last-output.md"
+    if not output_file.exists():
+        return None
+    content = output_file.read_text()
+    return content[:3000]
+
+
+async def get_plane_projects() -> list[dict]:
+    headers = {"X-Api-Key": PLANE_API_KEY}
+    async with httpx.AsyncClient() as client:
+        resp = await client.get(
+            f"{PLANE_URL}/api/v1/workspaces/{PLANE_WORKSPACE}/projects/",
+            headers=headers, timeout=10,
+        )
+        resp.raise_for_status()
+        return resp.json().get("results", [])
+
+
+def _resolve_project(projects: list[dict], hint: str | None) -> dict:
+    if hint:
+        hint_lower = hint.lower()
+        for p in projects:
+            if hint_lower in p["name"].lower():
+                return p
+    # Default: Agent Ecosystem
+    for p in projects:
+        if "agent" in p["name"].lower():
+            return p
+    return projects[0]
+
+
+async def create_plane_issue(title: str, project_hint: str | None = None) -> dict:
+    projects = await get_plane_projects()
+    if not projects:
+        raise ValueError("No Plane projects found")
+
+    project = _resolve_project(projects, project_hint)
+
+    headers = {"X-Api-Key": PLANE_API_KEY, "Content-Type": "application/json"}
+    async with httpx.AsyncClient() as client:
+        resp = await client.post(
+            f"{PLANE_URL}/api/v1/workspaces/{PLANE_WORKSPACE}/projects/{project['id']}/issues/",
+            headers=headers,
+            json={"name": title[:255], "priority": "none"},
+            timeout=10,
+        )
+        resp.raise_for_status()
+        issue = resp.json()
+
+    return {
+        "sequence_id": issue.get("sequence_id", "?"),
+        "title": title[:80],
+        "project": project["name"],
+    }
@@ -0,0 +1,26 @@
+services:
+  jon-snow:
+    build: .
+    container_name: jon-snow
+    restart: unless-stopped
+    ports:
+      - "8900:8900"
+    volumes:
+      - /opt/agent-os:/opt/agent-os:rw
+      - /opt/sites:/opt/sites:rw
+    environment:
+      FAST_MODEL: ${FAST_MODEL:-ollama/gemma4}
+      SMART_MODEL: ${SMART_MODEL:-ollama/gemma4}
+      OLLAMA_BASE_URL: ${OLLAMA_BASE_URL:-http://172.27.40.20:11434}
+      ANTHROPIC_API_KEY: ${ANTHROPIC_API_KEY:-}
+      PLANE_URL: ${PLANE_URL:-http://172.27.40.3:8095}
+      PLANE_WORKSPACE: ${PLANE_WORKSPACE:-nxm}
+      PLANE_API_KEY: ${PLANE_API_KEY}
+      AGENT_OS_DIR: /opt/agent-os
+      SITES_DIR: /opt/sites
+    networks:
+      - proxy
+
+networks:
+  proxy:
+    external: true
@@ -0,0 +1,6 @@
+fastapi==0.115.12
+uvicorn[standard]==0.34.3
+litellm==1.72.6
+httpx==0.28.1
+pydantic==2.11.4
+python-multipart==0.0.20