From 371679d330403a787fe7945f6474155ede97c331 Mon Sep 17 00:00:00 2001 From: NxM Claude Date: Wed, 6 May 2026 04:54:52 +0200 Subject: [PATCH] =?UTF-8?q?Initial=20sam-research=20agent=20=E2=80=94=20we?= =?UTF-8?q?b=20research=20via=20SearXNG=20+=20Ollama=20synthesis?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Claude Sonnet 4.6 --- Dockerfile | 6 ++ docker-compose.yml | 22 +++++ main.py | 217 +++++++++++++++++++++++++++++++++++++++++++++ requirements.txt | 4 + 4 files changed, 249 insertions(+) create mode 100644 Dockerfile create mode 100644 docker-compose.yml create mode 100644 main.py create mode 100644 requirements.txt diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..53f73fd --- /dev/null +++ b/Dockerfile @@ -0,0 +1,6 @@ +FROM python:3.12-slim +WORKDIR /app +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt +COPY main.py . +CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8500"] diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..64f6e0e --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,22 @@ +services: + sam: + build: . + container_name: sam-research + restart: unless-stopped + ports: + - "8500:8500" + volumes: + - /opt/agent-os:/opt/agent-os + - /opt/sites:/opt/sites + environment: + OLLAMA_URL: http://172.27.6.139:11434 + OLLAMA_MODEL: gemma4 + SEARXNG_URL: http://searxng:8080 + SITES_DIR: /opt/sites + AGENT_OS_DIR: /opt/agent-os + networks: + - proxy + +networks: + proxy: + external: true diff --git a/main.py b/main.py new file mode 100644 index 0000000..f941767 --- /dev/null +++ b/main.py @@ -0,0 +1,217 @@ +import json +import logging +import os +from datetime import datetime, timezone +from pathlib import Path + +import httpx +from fastapi import FastAPI, HTTPException +from pydantic import BaseModel + +logging.basicConfig(level=logging.INFO, format="%(asctime)s [sam] %(message)s") +logger = logging.getLogger("sam") + +app = FastAPI(title="sam-research", version="0.1.0") + +OLLAMA_URL = os.getenv("OLLAMA_URL", "http://172.27.6.139:11434") +OLLAMA_MODEL = os.getenv("OLLAMA_MODEL", "gemma4") +SEARXNG_URL = os.getenv("SEARXNG_URL", "http://searxng:8080") +SITES_DIR = Path(os.getenv("SITES_DIR", "/opt/sites")) +AGENT_OS_DIR = Path(os.getenv("AGENT_OS_DIR", "/opt/agent-os")) + +HISTORY_FILE = SITES_DIR / "sam" / "history.json" +MAX_HISTORY = 10 + + +def load_history() -> list[dict]: + if HISTORY_FILE.exists(): + try: + return json.loads(HISTORY_FILE.read_text()) + except Exception: + pass + return [] + + +def save_history(entry: dict): + history = load_history() + history.insert(0, entry) + HISTORY_FILE.parent.mkdir(parents=True, exist_ok=True) + HISTORY_FILE.write_text(json.dumps(history[:MAX_HISTORY], indent=2)) + + +async def search_web(query: str, max_results: int) -> list[dict]: + params = {"q": query, "format": "json", "categories": "general", "language": "en"} + async with httpx.AsyncClient(timeout=30.0) as client: + r = await client.get(f"{SEARXNG_URL}/search", params=params) + r.raise_for_status() + data = r.json() + return [ + {"title": item.get("title", ""), "url": item.get("url", ""), "snippet": item.get("content", "")} + for item in data.get("results", [])[:max_results] + ] + + +async def ollama_synthesize(prompt: str, model: str) -> str: + payload = { + "model": model, + "messages": [{"role": "user", "content": prompt}], + "stream": False, + } + async with httpx.AsyncClient(timeout=120.0) as client: + r = await client.post(f"{OLLAMA_URL}/api/chat", json=payload) + r.raise_for_status() + return r.json()["message"]["content"] + + +def build_prompt(query: str, results: list[dict], instructions: str | None) -> str: + lines = [f"You are a research assistant. The user wants information on: {query}\n"] + if instructions: + lines.append(f"Instructions: {instructions}\n") + lines.append("Web search results:\n") + for i, r in enumerate(results, 1): + lines.append(f"{i}. **{r['title']}**") + lines.append(f" URL: {r['url']}") + if r["snippet"]: + lines.append(f" {r['snippet']}") + lines.append("") + lines.append( + "Based on these results, provide a thorough answer. " + "Follow any format instructions given above. " + "Cite sources by referencing their URLs inline." + ) + return "\n".join(lines) + + +def render_html(history: list[dict]) -> str: + now_iso = datetime.now(timezone.utc).isoformat() + + rows = "" + for entry in history: + ts = entry.get("timestamp", "") + ts_span = f'' if ts else "—" + query = entry.get("query", "") + model = entry.get("model", "") + result_len = entry.get("summary_chars", 0) + rows += f""" + + {ts_span} + {query} + {model} + {result_len} chars + """ + + empty = "No research runs yet." if not rows else rows + + return f""" + + + + + Sam — Research + + + +

Sam — Research Agent

+

Web search via SearXNG + synthesis via Ollama. POST /research to use.

+

Updated  ·  ← home

+

Recent Queries

+ {empty}
+ + +""" + + +class ResearchRequest(BaseModel): + query: str + max_results: int = 5 + instructions: str | None = None + model: str | None = None + + +@app.get("/health") +def health(): + return { + "status": "ok", + "agent": "sam-research", + "searxng_url": SEARXNG_URL, + "ollama_url": OLLAMA_URL, + "model": OLLAMA_MODEL, + } + + +@app.post("/research") +async def research(req: ResearchRequest): + model = req.model or OLLAMA_MODEL + timestamp = datetime.now(timezone.utc).isoformat() + + logger.info(f"research: query='{req.query}' max_results={req.max_results} model={model}") + + try: + results = await search_web(req.query, req.max_results) + except httpx.HTTPError as e: + raise HTTPException(status_code=502, detail=f"SearXNG error: {e}") + + logger.info(f"got {len(results)} results from SearXNG") + + prompt = build_prompt(req.query, results, req.instructions) + + try: + summary = await ollama_synthesize(prompt, model) + except httpx.HTTPError as e: + raise HTTPException(status_code=502, detail=f"Ollama error: {e}") + + logger.info(f"synthesis complete: {len(summary)} chars") + + out_dir = SITES_DIR / "sam" + out_dir.mkdir(parents=True, exist_ok=True) + + (out_dir / "last-output.md").write_text( + f"# Sam Research — {req.query}\n\n" + f"_Query: {req.query}_ \n" + f"_Model: {model}_ \n" + f"_Results: {len(results)}_ \n" + f"_Timestamp: {timestamp}_\n\n" + f"---\n\n{summary}" + ) + + history_entry = { + "timestamp": timestamp, + "query": req.query, + "model": model, + "summary_chars": len(summary), + } + save_history(history_entry) + (out_dir / "index.html").write_text(render_html(load_history())) + + log_dir = AGENT_OS_DIR / "logs" / "sam-research" + log_dir.mkdir(parents=True, exist_ok=True) + (log_dir / "last-run.json").write_text(json.dumps({ + "agent": "sam-research", + "timestamp": timestamp, + "status": "success", + "result": f"query='{req.query}' results={len(results)} summary={len(summary)} chars", + }, indent=2)) + + return { + "query": req.query, + "results": results, + "summary": summary, + "model": model, + "timestamp": timestamp, + } diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..7fa5112 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,4 @@ +fastapi +uvicorn[standard] +httpx +pydantic