Python MCP server that exposes a web_search tool backed by a SearXNG instance. Includes tests with mocked HTTP via respx. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
120 lines
3.8 KiB
Python
120 lines
3.8 KiB
Python
import os
|
|
from urllib.parse import urlencode
|
|
|
|
import httpx
|
|
from mcp.server.fastmcp import FastMCP
|
|
|
|
SEARXNG_URL = os.environ.get("SEARXNG_URL", "http://192.168.50.224:30053")
|
|
|
|
mcp = FastMCP("searxng", instructions="Web search via SearXNG")
|
|
|
|
|
|
def build_search_url(
|
|
query: str,
|
|
categories: str | None = None,
|
|
language: str | None = None,
|
|
pageno: int | None = None,
|
|
time_range: str | None = None,
|
|
safesearch: int | None = None,
|
|
) -> str:
|
|
"""Build the SearXNG search URL with query parameters."""
|
|
params: dict[str, str | int] = {"q": query, "format": "json"}
|
|
if categories:
|
|
params["categories"] = categories
|
|
if language:
|
|
params["language"] = language
|
|
if pageno is not None:
|
|
params["pageno"] = pageno
|
|
if time_range:
|
|
params["time_range"] = time_range
|
|
if safesearch is not None:
|
|
params["safesearch"] = safesearch
|
|
return f"{SEARXNG_URL}/search?{urlencode(params)}"
|
|
|
|
|
|
def format_results(data: dict, max_results: int = 10) -> str:
|
|
"""Format SearXNG JSON response into readable text."""
|
|
parts: list[str] = []
|
|
|
|
answers = data.get("answers", [])
|
|
if answers:
|
|
parts.append("Direct answers:")
|
|
for answer in answers:
|
|
parts.append(f" {answer}")
|
|
parts.append("")
|
|
|
|
results = data.get("results", [])
|
|
if not results:
|
|
return "No results found."
|
|
|
|
results = results[:max_results]
|
|
|
|
for i, r in enumerate(results, 1):
|
|
lines = [f"{i}. {r.get('title', 'No title')}", f" URL: {r.get('url', '')}"]
|
|
content = r.get("content", "")
|
|
if content:
|
|
lines.append(f" {content}")
|
|
published = r.get("publishedDate", "")
|
|
if published:
|
|
lines.append(f" Published: {published}")
|
|
engines = r.get("engines", [])
|
|
if engines:
|
|
lines.append(f" Source: {', '.join(engines)}")
|
|
parts.append("\n".join(lines))
|
|
|
|
total = data.get("total_results", "unknown")
|
|
time_taken = data.get("time_taken", "unknown")
|
|
parts.append(f"\n({total} total results in {time_taken}s)")
|
|
|
|
return "\n\n".join(parts)
|
|
|
|
|
|
@mcp.tool()
|
|
async def web_search(
|
|
query: str,
|
|
categories: str | None = None,
|
|
language: str | None = None,
|
|
pageno: int | None = None,
|
|
time_range: str | None = None,
|
|
safesearch: int | None = None,
|
|
max_results: int = 10,
|
|
) -> str:
|
|
"""Search the web using SearXNG.
|
|
|
|
Args:
|
|
query: Search query string.
|
|
categories: Comma-separated categories (general, images, news, videos, music, files, it, science, social media).
|
|
language: Language code (e.g. en, de, fr).
|
|
pageno: Page number starting at 1.
|
|
time_range: Time filter: day, week, month, or year.
|
|
safesearch: Safe search level: 0=off, 1=moderate, 2=strict.
|
|
max_results: Maximum number of results to return (default 10, max 50).
|
|
"""
|
|
max_results = min(max(1, max_results), 50)
|
|
url = build_search_url(
|
|
query,
|
|
categories=categories,
|
|
language=language,
|
|
pageno=pageno,
|
|
time_range=time_range,
|
|
safesearch=safesearch,
|
|
)
|
|
try:
|
|
async with httpx.AsyncClient(timeout=30.0) as client:
|
|
response = await client.get(url)
|
|
if response.status_code != 200:
|
|
return f"SearXNG returned HTTP {response.status_code}: {response.text[:500]}"
|
|
data = response.json()
|
|
except httpx.ConnectError as e:
|
|
return f"Connection error: Could not reach SearXNG at {SEARXNG_URL}. {e}"
|
|
except httpx.TimeoutException:
|
|
return f"Timeout: SearXNG at {SEARXNG_URL} did not respond within 30 seconds."
|
|
except Exception as e:
|
|
return f"Error querying SearXNG: {e}"
|
|
|
|
return format_results(data, max_results=max_results)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
mcp.run(transport="stdio")
|