Initial sam-research agent — web research via SearXNG + Ollama synthesis

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-05-06 04:54:52 +02:00
commit 371679d330
4 changed files with 249 additions and 0 deletions
+217
View File
@@ -0,0 +1,217 @@
import json
import logging
import os
from datetime import datetime, timezone
from pathlib import Path
import httpx
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
logging.basicConfig(level=logging.INFO, format="%(asctime)s [sam] %(message)s")
logger = logging.getLogger("sam")
app = FastAPI(title="sam-research", version="0.1.0")
OLLAMA_URL = os.getenv("OLLAMA_URL", "http://172.27.6.139:11434")
OLLAMA_MODEL = os.getenv("OLLAMA_MODEL", "gemma4")
SEARXNG_URL = os.getenv("SEARXNG_URL", "http://searxng:8080")
SITES_DIR = Path(os.getenv("SITES_DIR", "/opt/sites"))
AGENT_OS_DIR = Path(os.getenv("AGENT_OS_DIR", "/opt/agent-os"))
HISTORY_FILE = SITES_DIR / "sam" / "history.json"
MAX_HISTORY = 10
def load_history() -> list[dict]:
if HISTORY_FILE.exists():
try:
return json.loads(HISTORY_FILE.read_text())
except Exception:
pass
return []
def save_history(entry: dict):
history = load_history()
history.insert(0, entry)
HISTORY_FILE.parent.mkdir(parents=True, exist_ok=True)
HISTORY_FILE.write_text(json.dumps(history[:MAX_HISTORY], indent=2))
async def search_web(query: str, max_results: int) -> list[dict]:
params = {"q": query, "format": "json", "categories": "general", "language": "en"}
async with httpx.AsyncClient(timeout=30.0) as client:
r = await client.get(f"{SEARXNG_URL}/search", params=params)
r.raise_for_status()
data = r.json()
return [
{"title": item.get("title", ""), "url": item.get("url", ""), "snippet": item.get("content", "")}
for item in data.get("results", [])[:max_results]
]
async def ollama_synthesize(prompt: str, model: str) -> str:
payload = {
"model": model,
"messages": [{"role": "user", "content": prompt}],
"stream": False,
}
async with httpx.AsyncClient(timeout=120.0) as client:
r = await client.post(f"{OLLAMA_URL}/api/chat", json=payload)
r.raise_for_status()
return r.json()["message"]["content"]
def build_prompt(query: str, results: list[dict], instructions: str | None) -> str:
lines = [f"You are a research assistant. The user wants information on: {query}\n"]
if instructions:
lines.append(f"Instructions: {instructions}\n")
lines.append("Web search results:\n")
for i, r in enumerate(results, 1):
lines.append(f"{i}. **{r['title']}**")
lines.append(f" URL: {r['url']}")
if r["snippet"]:
lines.append(f" {r['snippet']}")
lines.append("")
lines.append(
"Based on these results, provide a thorough answer. "
"Follow any format instructions given above. "
"Cite sources by referencing their URLs inline."
)
return "\n".join(lines)
def render_html(history: list[dict]) -> str:
now_iso = datetime.now(timezone.utc).isoformat()
rows = ""
for entry in history:
ts = entry.get("timestamp", "")
ts_span = f'<span data-utc="{ts}"></span>' if ts else ""
query = entry.get("query", "")
model = entry.get("model", "")
result_len = entry.get("summary_chars", 0)
rows += f"""
<tr>
<td>{ts_span}</td>
<td class="query">{query}</td>
<td class="dim">{model}</td>
<td class="dim">{result_len} chars</td>
</tr>"""
empty = "<tr><td colspan='4' class='dim'>No research runs yet.</td></tr>" if not rows else rows
return f"""<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>Sam — Research</title>
<style>
* {{ box-sizing: border-box; margin: 0; padding: 0; }}
body {{ font-family: monospace; background: #0d1117; color: #c9d1d9; padding: 2rem; }}
h1 {{ color: #58a6ff; margin-bottom: 0.25rem; }}
.desc {{ color: #8b949e; font-size: 0.9rem; margin: 0.4rem 0 0.25rem; }}
.meta {{ color: #8b949e; font-size: 0.85rem; margin-bottom: 2rem; }}
.meta a {{ color: #8b949e; }}
h2 {{ color: #8b949e; font-size: 0.85rem; text-transform: uppercase; letter-spacing: 0.05em; margin: 2rem 0 0.75rem; }}
table {{ width: 100%; border-collapse: collapse; }}
td {{ padding: 0.6rem 1rem; border-bottom: 1px solid #21262d; font-size: 0.9rem; }}
.query {{ color: #c9d1d9; }}
.dim {{ color: #8b949e; }}
</style>
</head>
<body>
<h1>Sam — Research Agent</h1>
<p class="desc">Web search via SearXNG + synthesis via Ollama. POST /research to use.</p>
<p class="meta">Updated <span data-utc="{now_iso}"></span> &nbsp;·&nbsp; <a href="/">← home</a></p>
<h2>Recent Queries</h2>
<table><tbody>{empty}</tbody></table>
<script>
document.querySelectorAll('[data-utc]').forEach(el => {{
el.textContent = new Date(el.dataset.utc).toLocaleString(undefined, {{year:'numeric',month:'2-digit',day:'2-digit',hour:'2-digit',minute:'2-digit'}});
}});
</script>
</body>
</html>"""
class ResearchRequest(BaseModel):
query: str
max_results: int = 5
instructions: str | None = None
model: str | None = None
@app.get("/health")
def health():
return {
"status": "ok",
"agent": "sam-research",
"searxng_url": SEARXNG_URL,
"ollama_url": OLLAMA_URL,
"model": OLLAMA_MODEL,
}
@app.post("/research")
async def research(req: ResearchRequest):
model = req.model or OLLAMA_MODEL
timestamp = datetime.now(timezone.utc).isoformat()
logger.info(f"research: query='{req.query}' max_results={req.max_results} model={model}")
try:
results = await search_web(req.query, req.max_results)
except httpx.HTTPError as e:
raise HTTPException(status_code=502, detail=f"SearXNG error: {e}")
logger.info(f"got {len(results)} results from SearXNG")
prompt = build_prompt(req.query, results, req.instructions)
try:
summary = await ollama_synthesize(prompt, model)
except httpx.HTTPError as e:
raise HTTPException(status_code=502, detail=f"Ollama error: {e}")
logger.info(f"synthesis complete: {len(summary)} chars")
out_dir = SITES_DIR / "sam"
out_dir.mkdir(parents=True, exist_ok=True)
(out_dir / "last-output.md").write_text(
f"# Sam Research — {req.query}\n\n"
f"_Query: {req.query}_ \n"
f"_Model: {model}_ \n"
f"_Results: {len(results)}_ \n"
f"_Timestamp: {timestamp}_\n\n"
f"---\n\n{summary}"
)
history_entry = {
"timestamp": timestamp,
"query": req.query,
"model": model,
"summary_chars": len(summary),
}
save_history(history_entry)
(out_dir / "index.html").write_text(render_html(load_history()))
log_dir = AGENT_OS_DIR / "logs" / "sam-research"
log_dir.mkdir(parents=True, exist_ok=True)
(log_dir / "last-run.json").write_text(json.dumps({
"agent": "sam-research",
"timestamp": timestamp,
"status": "success",
"result": f"query='{req.query}' results={len(results)} summary={len(summary)} chars",
}, indent=2))
return {
"query": req.query,
"results": results,
"summary": summary,
"model": model,
"timestamp": timestamp,
}