import json import logging import os from datetime import datetime, timezone from pathlib import Path import httpx from fastapi import FastAPI, HTTPException from pydantic import BaseModel logging.basicConfig(level=logging.INFO, format="%(asctime)s [sam] %(message)s") logger = logging.getLogger("sam") app = FastAPI(title="sam-research", version="0.1.0") OLLAMA_URL = os.getenv("OLLAMA_URL", "http://172.27.6.139:11434") OLLAMA_MODEL = os.getenv("OLLAMA_MODEL", "gemma4") SEARXNG_URL = os.getenv("SEARXNG_URL", "http://searxng:8080") SITES_DIR = Path(os.getenv("SITES_DIR", "/opt/sites")) AGENT_OS_DIR = Path(os.getenv("AGENT_OS_DIR", "/opt/agent-os")) HISTORY_FILE = SITES_DIR / "sam" / "history.json" MAX_HISTORY = 10 def load_history() -> list[dict]: if HISTORY_FILE.exists(): try: return json.loads(HISTORY_FILE.read_text()) except Exception: pass return [] def save_history(entry: dict): history = load_history() history.insert(0, entry) HISTORY_FILE.parent.mkdir(parents=True, exist_ok=True) HISTORY_FILE.write_text(json.dumps(history[:MAX_HISTORY], indent=2)) async def search_web(query: str, max_results: int) -> list[dict]: params = {"q": query, "format": "json", "categories": "general", "language": "en"} async with httpx.AsyncClient(timeout=30.0) as client: r = await client.get(f"{SEARXNG_URL}/search", params=params) r.raise_for_status() data = r.json() return [ {"title": item.get("title", ""), "url": item.get("url", ""), "snippet": item.get("content", "")} for item in data.get("results", [])[:max_results] ] async def ollama_synthesize(prompt: str, model: str) -> str: payload = { "model": model, "messages": [{"role": "user", "content": prompt}], "stream": False, } async with httpx.AsyncClient(timeout=120.0) as client: r = await client.post(f"{OLLAMA_URL}/api/chat", json=payload) r.raise_for_status() return r.json()["message"]["content"] def build_prompt(query: str, results: list[dict], instructions: str | None) -> str: lines = [f"You are a research assistant. The user wants information on: {query}\n"] if instructions: lines.append(f"Instructions: {instructions}\n") lines.append("Web search results:\n") for i, r in enumerate(results, 1): lines.append(f"{i}. **{r['title']}**") lines.append(f" URL: {r['url']}") if r["snippet"]: lines.append(f" {r['snippet']}") lines.append("") lines.append( "Based on these results, provide a thorough answer. " "Follow any format instructions given above. " "Cite sources by referencing their URLs inline." ) return "\n".join(lines) def render_html(history: list[dict]) -> str: now_iso = datetime.now(timezone.utc).isoformat() rows = "" for entry in history: ts = entry.get("timestamp", "") ts_span = f'' if ts else "—" query = entry.get("query", "") model = entry.get("model", "") result_len = entry.get("summary_chars", 0) rows += f""" {ts_span} {query} {model} {result_len} chars """ empty = "No research runs yet." if not rows else rows return f""" Sam — Research

Sam — Research Agent

Web search via SearXNG + synthesis via Ollama. POST /research to use.

Updated  ·  ← home

Recent Queries

{empty}
""" class ResearchRequest(BaseModel): query: str max_results: int = 5 instructions: str | None = None model: str | None = None @app.get("/health") def health(): return { "status": "ok", "agent": "sam-research", "searxng_url": SEARXNG_URL, "ollama_url": OLLAMA_URL, "model": OLLAMA_MODEL, } @app.post("/research") async def research(req: ResearchRequest): model = req.model or OLLAMA_MODEL timestamp = datetime.now(timezone.utc).isoformat() logger.info(f"research: query='{req.query}' max_results={req.max_results} model={model}") try: results = await search_web(req.query, req.max_results) except httpx.HTTPError as e: raise HTTPException(status_code=502, detail=f"SearXNG error: {e}") logger.info(f"got {len(results)} results from SearXNG") prompt = build_prompt(req.query, results, req.instructions) try: summary = await ollama_synthesize(prompt, model) except httpx.HTTPError as e: raise HTTPException(status_code=502, detail=f"Ollama error: {e}") logger.info(f"synthesis complete: {len(summary)} chars") out_dir = SITES_DIR / "sam" out_dir.mkdir(parents=True, exist_ok=True) (out_dir / "last-output.md").write_text( f"# Sam Research — {req.query}\n\n" f"_Query: {req.query}_ \n" f"_Model: {model}_ \n" f"_Results: {len(results)}_ \n" f"_Timestamp: {timestamp}_\n\n" f"---\n\n{summary}" ) history_entry = { "timestamp": timestamp, "query": req.query, "model": model, "summary_chars": len(summary), } save_history(history_entry) (out_dir / "index.html").write_text(render_html(load_history())) log_dir = AGENT_OS_DIR / "logs" / "sam-research" log_dir.mkdir(parents=True, exist_ok=True) (log_dir / "last-run.json").write_text(json.dumps({ "agent": "sam-research", "timestamp": timestamp, "status": "success", "result": f"query='{req.query}' results={len(results)} summary={len(summary)} chars", }, indent=2)) return { "query": req.query, "results": results, "summary": summary, "model": model, "timestamp": timestamp, }