Initial sam-research agent — web research via SearXNG + Ollama synthesis
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,217 @@
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
|
||||
import httpx
|
||||
from fastapi import FastAPI, HTTPException
|
||||
from pydantic import BaseModel
|
||||
|
||||
logging.basicConfig(level=logging.INFO, format="%(asctime)s [sam] %(message)s")
|
||||
logger = logging.getLogger("sam")
|
||||
|
||||
app = FastAPI(title="sam-research", version="0.1.0")
|
||||
|
||||
OLLAMA_URL = os.getenv("OLLAMA_URL", "http://172.27.6.139:11434")
|
||||
OLLAMA_MODEL = os.getenv("OLLAMA_MODEL", "gemma4")
|
||||
SEARXNG_URL = os.getenv("SEARXNG_URL", "http://searxng:8080")
|
||||
SITES_DIR = Path(os.getenv("SITES_DIR", "/opt/sites"))
|
||||
AGENT_OS_DIR = Path(os.getenv("AGENT_OS_DIR", "/opt/agent-os"))
|
||||
|
||||
HISTORY_FILE = SITES_DIR / "sam" / "history.json"
|
||||
MAX_HISTORY = 10
|
||||
|
||||
|
||||
def load_history() -> list[dict]:
|
||||
if HISTORY_FILE.exists():
|
||||
try:
|
||||
return json.loads(HISTORY_FILE.read_text())
|
||||
except Exception:
|
||||
pass
|
||||
return []
|
||||
|
||||
|
||||
def save_history(entry: dict):
|
||||
history = load_history()
|
||||
history.insert(0, entry)
|
||||
HISTORY_FILE.parent.mkdir(parents=True, exist_ok=True)
|
||||
HISTORY_FILE.write_text(json.dumps(history[:MAX_HISTORY], indent=2))
|
||||
|
||||
|
||||
async def search_web(query: str, max_results: int) -> list[dict]:
|
||||
params = {"q": query, "format": "json", "categories": "general", "language": "en"}
|
||||
async with httpx.AsyncClient(timeout=30.0) as client:
|
||||
r = await client.get(f"{SEARXNG_URL}/search", params=params)
|
||||
r.raise_for_status()
|
||||
data = r.json()
|
||||
return [
|
||||
{"title": item.get("title", ""), "url": item.get("url", ""), "snippet": item.get("content", "")}
|
||||
for item in data.get("results", [])[:max_results]
|
||||
]
|
||||
|
||||
|
||||
async def ollama_synthesize(prompt: str, model: str) -> str:
|
||||
payload = {
|
||||
"model": model,
|
||||
"messages": [{"role": "user", "content": prompt}],
|
||||
"stream": False,
|
||||
}
|
||||
async with httpx.AsyncClient(timeout=120.0) as client:
|
||||
r = await client.post(f"{OLLAMA_URL}/api/chat", json=payload)
|
||||
r.raise_for_status()
|
||||
return r.json()["message"]["content"]
|
||||
|
||||
|
||||
def build_prompt(query: str, results: list[dict], instructions: str | None) -> str:
|
||||
lines = [f"You are a research assistant. The user wants information on: {query}\n"]
|
||||
if instructions:
|
||||
lines.append(f"Instructions: {instructions}\n")
|
||||
lines.append("Web search results:\n")
|
||||
for i, r in enumerate(results, 1):
|
||||
lines.append(f"{i}. **{r['title']}**")
|
||||
lines.append(f" URL: {r['url']}")
|
||||
if r["snippet"]:
|
||||
lines.append(f" {r['snippet']}")
|
||||
lines.append("")
|
||||
lines.append(
|
||||
"Based on these results, provide a thorough answer. "
|
||||
"Follow any format instructions given above. "
|
||||
"Cite sources by referencing their URLs inline."
|
||||
)
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def render_html(history: list[dict]) -> str:
|
||||
now_iso = datetime.now(timezone.utc).isoformat()
|
||||
|
||||
rows = ""
|
||||
for entry in history:
|
||||
ts = entry.get("timestamp", "")
|
||||
ts_span = f'<span data-utc="{ts}"></span>' if ts else "—"
|
||||
query = entry.get("query", "")
|
||||
model = entry.get("model", "")
|
||||
result_len = entry.get("summary_chars", 0)
|
||||
rows += f"""
|
||||
<tr>
|
||||
<td>{ts_span}</td>
|
||||
<td class="query">{query}</td>
|
||||
<td class="dim">{model}</td>
|
||||
<td class="dim">{result_len} chars</td>
|
||||
</tr>"""
|
||||
|
||||
empty = "<tr><td colspan='4' class='dim'>No research runs yet.</td></tr>" if not rows else rows
|
||||
|
||||
return f"""<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||
<title>Sam — Research</title>
|
||||
<style>
|
||||
* {{ box-sizing: border-box; margin: 0; padding: 0; }}
|
||||
body {{ font-family: monospace; background: #0d1117; color: #c9d1d9; padding: 2rem; }}
|
||||
h1 {{ color: #58a6ff; margin-bottom: 0.25rem; }}
|
||||
.desc {{ color: #8b949e; font-size: 0.9rem; margin: 0.4rem 0 0.25rem; }}
|
||||
.meta {{ color: #8b949e; font-size: 0.85rem; margin-bottom: 2rem; }}
|
||||
.meta a {{ color: #8b949e; }}
|
||||
h2 {{ color: #8b949e; font-size: 0.85rem; text-transform: uppercase; letter-spacing: 0.05em; margin: 2rem 0 0.75rem; }}
|
||||
table {{ width: 100%; border-collapse: collapse; }}
|
||||
td {{ padding: 0.6rem 1rem; border-bottom: 1px solid #21262d; font-size: 0.9rem; }}
|
||||
.query {{ color: #c9d1d9; }}
|
||||
.dim {{ color: #8b949e; }}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<h1>Sam — Research Agent</h1>
|
||||
<p class="desc">Web search via SearXNG + synthesis via Ollama. POST /research to use.</p>
|
||||
<p class="meta">Updated <span data-utc="{now_iso}"></span> · <a href="/">← home</a></p>
|
||||
<h2>Recent Queries</h2>
|
||||
<table><tbody>{empty}</tbody></table>
|
||||
<script>
|
||||
document.querySelectorAll('[data-utc]').forEach(el => {{
|
||||
el.textContent = new Date(el.dataset.utc).toLocaleString(undefined, {{year:'numeric',month:'2-digit',day:'2-digit',hour:'2-digit',minute:'2-digit'}});
|
||||
}});
|
||||
</script>
|
||||
</body>
|
||||
</html>"""
|
||||
|
||||
|
||||
class ResearchRequest(BaseModel):
|
||||
query: str
|
||||
max_results: int = 5
|
||||
instructions: str | None = None
|
||||
model: str | None = None
|
||||
|
||||
|
||||
@app.get("/health")
|
||||
def health():
|
||||
return {
|
||||
"status": "ok",
|
||||
"agent": "sam-research",
|
||||
"searxng_url": SEARXNG_URL,
|
||||
"ollama_url": OLLAMA_URL,
|
||||
"model": OLLAMA_MODEL,
|
||||
}
|
||||
|
||||
|
||||
@app.post("/research")
|
||||
async def research(req: ResearchRequest):
|
||||
model = req.model or OLLAMA_MODEL
|
||||
timestamp = datetime.now(timezone.utc).isoformat()
|
||||
|
||||
logger.info(f"research: query='{req.query}' max_results={req.max_results} model={model}")
|
||||
|
||||
try:
|
||||
results = await search_web(req.query, req.max_results)
|
||||
except httpx.HTTPError as e:
|
||||
raise HTTPException(status_code=502, detail=f"SearXNG error: {e}")
|
||||
|
||||
logger.info(f"got {len(results)} results from SearXNG")
|
||||
|
||||
prompt = build_prompt(req.query, results, req.instructions)
|
||||
|
||||
try:
|
||||
summary = await ollama_synthesize(prompt, model)
|
||||
except httpx.HTTPError as e:
|
||||
raise HTTPException(status_code=502, detail=f"Ollama error: {e}")
|
||||
|
||||
logger.info(f"synthesis complete: {len(summary)} chars")
|
||||
|
||||
out_dir = SITES_DIR / "sam"
|
||||
out_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
(out_dir / "last-output.md").write_text(
|
||||
f"# Sam Research — {req.query}\n\n"
|
||||
f"_Query: {req.query}_ \n"
|
||||
f"_Model: {model}_ \n"
|
||||
f"_Results: {len(results)}_ \n"
|
||||
f"_Timestamp: {timestamp}_\n\n"
|
||||
f"---\n\n{summary}"
|
||||
)
|
||||
|
||||
history_entry = {
|
||||
"timestamp": timestamp,
|
||||
"query": req.query,
|
||||
"model": model,
|
||||
"summary_chars": len(summary),
|
||||
}
|
||||
save_history(history_entry)
|
||||
(out_dir / "index.html").write_text(render_html(load_history()))
|
||||
|
||||
log_dir = AGENT_OS_DIR / "logs" / "sam-research"
|
||||
log_dir.mkdir(parents=True, exist_ok=True)
|
||||
(log_dir / "last-run.json").write_text(json.dumps({
|
||||
"agent": "sam-research",
|
||||
"timestamp": timestamp,
|
||||
"status": "success",
|
||||
"result": f"query='{req.query}' results={len(results)} summary={len(summary)} chars",
|
||||
}, indent=2))
|
||||
|
||||
return {
|
||||
"query": req.query,
|
||||
"results": results,
|
||||
"summary": summary,
|
||||
"model": model,
|
||||
"timestamp": timestamp,
|
||||
}
|
||||
Reference in New Issue
Block a user