feat: add raven stub, state tracking, sam-research + searxng monitoring

- RAVEN_URL env var: sends down/recovery alerts when Raven is live (silent no-op until then)
- service-states.json persists prev state for change detection
- config.yaml: adds sam-research + searxng services, adds sam/citadel/varys agents

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
nxm
2026-05-06 10:54:43 +02:00
parent 01ced45815
commit ffd907cd80
3 changed files with 44 additions and 0 deletions
+36
View File
@@ -13,9 +13,36 @@ logger = logging.getLogger("varys")
SITES_DIR = Path(os.getenv("SITES_DIR", "/opt/sites"))
AGENT_OS_DIR = Path(os.getenv("AGENT_OS_DIR", "/opt/agent-os"))
RAVEN_URL = os.getenv("RAVEN_URL", "")
CONFIG_FILE = Path(__file__).parent / "config.yaml"
def _load_prev_states() -> dict:
path = AGENT_OS_DIR / "logs" / "varys-monitor" / "service-states.json"
if not path.exists():
return {}
try:
return json.loads(path.read_text())
except Exception:
return {}
def _save_states(services: list):
path = AGENT_OS_DIR / "logs" / "varys-monitor" / "service-states.json"
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text(json.dumps({s["name"]: s["status"] for s in services}))
def _notify_raven(message: str, severity: str):
if not RAVEN_URL:
return
try:
httpx.post(f"{RAVEN_URL}/notify", json={"message": message, "severity": severity, "source": "varys"}, timeout=5)
logger.info(f"raven notified: {message}")
except Exception as e:
logger.warning(f"raven notify failed (raven not live yet?): {e}")
def check_service(name: str, url: str) -> dict:
start = time.monotonic()
try:
@@ -151,6 +178,15 @@ def main():
logger.info(f"agent {agent['name']}: {result['status']}")
agents.append(result)
prev_states = _load_prev_states()
for s in services:
prev = prev_states.get(s["name"])
if prev and prev != "down" and s["status"] == "down":
_notify_raven(f"{s['name']} is DOWN", "critical")
elif prev == "down" and s["status"] not in ("down", "degraded"):
_notify_raven(f"{s['name']} recovered (UP)", "info")
_save_states(services)
out_dir = SITES_DIR / "varys"
out_dir.mkdir(parents=True, exist_ok=True)
(out_dir / "index.html").write_text(render_html(services, agents))