Files
sam-research/main.py
T

218 lines
7.1 KiB
Python

import json
import logging
import os
from datetime import datetime, timezone
from pathlib import Path
import httpx
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
logging.basicConfig(level=logging.INFO, format="%(asctime)s [sam] %(message)s")
logger = logging.getLogger("sam")
app = FastAPI(title="sam-research", version="0.1.0")
OLLAMA_URL = os.getenv("OLLAMA_URL", "http://172.27.6.139:11434")
OLLAMA_MODEL = os.getenv("OLLAMA_MODEL", "gemma4")
SEARXNG_URL = os.getenv("SEARXNG_URL", "http://searxng:8080")
SITES_DIR = Path(os.getenv("SITES_DIR", "/opt/sites"))
AGENT_OS_DIR = Path(os.getenv("AGENT_OS_DIR", "/opt/agent-os"))
HISTORY_FILE = SITES_DIR / "sam" / "history.json"
MAX_HISTORY = 10
def load_history() -> list[dict]:
if HISTORY_FILE.exists():
try:
return json.loads(HISTORY_FILE.read_text())
except Exception:
pass
return []
def save_history(entry: dict):
history = load_history()
history.insert(0, entry)
HISTORY_FILE.parent.mkdir(parents=True, exist_ok=True)
HISTORY_FILE.write_text(json.dumps(history[:MAX_HISTORY], indent=2))
async def search_web(query: str, max_results: int) -> list[dict]:
params = {"q": query, "format": "json", "categories": "general", "language": "en"}
async with httpx.AsyncClient(timeout=30.0) as client:
r = await client.get(f"{SEARXNG_URL}/search", params=params)
r.raise_for_status()
data = r.json()
return [
{"title": item.get("title", ""), "url": item.get("url", ""), "snippet": item.get("content", "")}
for item in data.get("results", [])[:max_results]
]
async def ollama_synthesize(prompt: str, model: str) -> str:
payload = {
"model": model,
"messages": [{"role": "user", "content": prompt}],
"stream": False,
}
async with httpx.AsyncClient(timeout=120.0) as client:
r = await client.post(f"{OLLAMA_URL}/api/chat", json=payload)
r.raise_for_status()
return r.json()["message"]["content"]
def build_prompt(query: str, results: list[dict], instructions: str | None) -> str:
lines = [f"You are a research assistant. The user wants information on: {query}\n"]
if instructions:
lines.append(f"Instructions: {instructions}\n")
lines.append("Web search results:\n")
for i, r in enumerate(results, 1):
lines.append(f"{i}. **{r['title']}**")
lines.append(f" URL: {r['url']}")
if r["snippet"]:
lines.append(f" {r['snippet']}")
lines.append("")
lines.append(
"Based on these results, provide a thorough answer. "
"Follow any format instructions given above. "
"Cite sources by referencing their URLs inline."
)
return "\n".join(lines)
def render_html(history: list[dict]) -> str:
now_iso = datetime.now(timezone.utc).isoformat()
rows = ""
for entry in history:
ts = entry.get("timestamp", "")
ts_span = f'<span data-utc="{ts}"></span>' if ts else ""
query = entry.get("query", "")
model = entry.get("model", "")
result_len = entry.get("summary_chars", 0)
rows += f"""
<tr>
<td>{ts_span}</td>
<td class="query">{query}</td>
<td class="dim">{model}</td>
<td class="dim">{result_len} chars</td>
</tr>"""
empty = "<tr><td colspan='4' class='dim'>No research runs yet.</td></tr>" if not rows else rows
return f"""<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>Sam — Research</title>
<style>
* {{ box-sizing: border-box; margin: 0; padding: 0; }}
body {{ font-family: monospace; background: #0d1117; color: #c9d1d9; padding: 2rem; }}
h1 {{ color: #58a6ff; margin-bottom: 0.25rem; }}
.desc {{ color: #8b949e; font-size: 0.9rem; margin: 0.4rem 0 0.25rem; }}
.meta {{ color: #8b949e; font-size: 0.85rem; margin-bottom: 2rem; }}
.meta a {{ color: #8b949e; }}
h2 {{ color: #8b949e; font-size: 0.85rem; text-transform: uppercase; letter-spacing: 0.05em; margin: 2rem 0 0.75rem; }}
table {{ width: 100%; border-collapse: collapse; }}
td {{ padding: 0.6rem 1rem; border-bottom: 1px solid #21262d; font-size: 0.9rem; }}
.query {{ color: #c9d1d9; }}
.dim {{ color: #8b949e; }}
</style>
</head>
<body>
<h1>Sam — Research Agent</h1>
<p class="desc">Web search via SearXNG + synthesis via Ollama. POST /research to use.</p>
<p class="meta">Updated <span data-utc="{now_iso}"></span> &nbsp;·&nbsp; <a href="/">← home</a></p>
<h2>Recent Queries</h2>
<table><tbody>{empty}</tbody></table>
<script>
document.querySelectorAll('[data-utc]').forEach(el => {{
el.textContent = new Date(el.dataset.utc).toLocaleString(undefined, {{year:'numeric',month:'2-digit',day:'2-digit',hour:'2-digit',minute:'2-digit'}});
}});
</script>
</body>
</html>"""
class ResearchRequest(BaseModel):
query: str
max_results: int = 5
instructions: str | None = None
model: str | None = None
@app.get("/health")
def health():
return {
"status": "ok",
"agent": "sam-research",
"searxng_url": SEARXNG_URL,
"ollama_url": OLLAMA_URL,
"model": OLLAMA_MODEL,
}
@app.post("/research")
async def research(req: ResearchRequest):
model = req.model or OLLAMA_MODEL
timestamp = datetime.now(timezone.utc).isoformat()
logger.info(f"research: query='{req.query}' max_results={req.max_results} model={model}")
try:
results = await search_web(req.query, req.max_results)
except httpx.HTTPError as e:
raise HTTPException(status_code=502, detail=f"SearXNG error: {e}")
logger.info(f"got {len(results)} results from SearXNG")
prompt = build_prompt(req.query, results, req.instructions)
try:
summary = await ollama_synthesize(prompt, model)
except httpx.HTTPError as e:
raise HTTPException(status_code=502, detail=f"Ollama error: {e}")
logger.info(f"synthesis complete: {len(summary)} chars")
out_dir = SITES_DIR / "sam"
out_dir.mkdir(parents=True, exist_ok=True)
(out_dir / "last-output.md").write_text(
f"# Sam Research — {req.query}\n\n"
f"_Query: {req.query}_ \n"
f"_Model: {model}_ \n"
f"_Results: {len(results)}_ \n"
f"_Timestamp: {timestamp}_\n\n"
f"---\n\n{summary}"
)
history_entry = {
"timestamp": timestamp,
"query": req.query,
"model": model,
"summary_chars": len(summary),
}
save_history(history_entry)
(out_dir / "index.html").write_text(render_html(load_history()))
log_dir = AGENT_OS_DIR / "logs" / "sam-research"
log_dir.mkdir(parents=True, exist_ok=True)
(log_dir / "last-run.json").write_text(json.dumps({
"agent": "sam-research",
"timestamp": timestamp,
"status": "success",
"result": f"query='{req.query}' results={len(results)} summary={len(summary)} chars",
}, indent=2))
return {
"query": req.query,
"results": results,
"summary": summary,
"model": model,
"timestamp": timestamp,
}