diff --git a/main.py b/main.py index f941767..77aafaa 100644 --- a/main.py +++ b/main.py @@ -1,6 +1,7 @@ import json import logging import os +import re from datetime import datetime, timezone from pathlib import Path @@ -8,6 +9,9 @@ import httpx from fastapi import FastAPI, HTTPException from pydantic import BaseModel +VERSION_PATTERN = re.compile(r'\bv?\d+\.\d+[\.\d]*\b') +VERSION_CONTEXT = re.compile(r'\b(latest|stable|current|released?|new version)\b', re.IGNORECASE) + logging.basicConfig(level=logging.INFO, format="%(asctime)s [sam] %(message)s") logger = logging.getLogger("sam") @@ -51,6 +55,28 @@ async def search_web(query: str, max_results: int) -> list[dict]: ] +def _has_version(results: list[dict]) -> bool: + for r in results: + text = r.get("snippet", "") + " " + r.get("title", "") + if VERSION_PATTERN.search(text) and VERSION_CONTEXT.search(text): + return True + return False + + +async def search_with_retry(query: str, max_results: int) -> tuple[list[dict], bool]: + """Search SearXNG, retrying with a refined query if no version number found in results. + Returns (results, retried).""" + results = await search_web(query, max_results) + if _has_version(results): + return results, False + retry_query = query + " latest release version changelog" + logger.info(f"no version found in results — retrying: '{retry_query}'") + retry_results = await search_web(retry_query, max_results) + seen_urls = {r["url"] for r in results} + merged = results + [r for r in retry_results if r["url"] not in seen_urls] + return merged[:max_results], True + + async def ollama_synthesize(prompt: str, model: str) -> str: payload = { "model": model, @@ -163,11 +189,11 @@ async def research(req: ResearchRequest): logger.info(f"research: query='{req.query}' max_results={req.max_results} model={model}") try: - results = await search_web(req.query, req.max_results) + results, retried = await search_with_retry(req.query, req.max_results) except httpx.HTTPError as e: raise HTTPException(status_code=502, detail=f"SearXNG error: {e}") - logger.info(f"got {len(results)} results from SearXNG") + logger.info(f"got {len(results)} results from SearXNG (retried={retried})") prompt = build_prompt(req.query, results, req.instructions) @@ -195,6 +221,7 @@ async def research(req: ResearchRequest): "query": req.query, "model": model, "summary_chars": len(summary), + "retried": retried, } save_history(history_entry) (out_dir / "index.html").write_text(render_html(load_history())) @@ -214,4 +241,5 @@ async def research(req: ResearchRequest): "summary": summary, "model": model, "timestamp": timestamp, + "retried": retried, }