commit 74ab7650e392ac4d32d1655099e38c1d64de266c Author: Brian Hetherman Date: Thu Apr 2 00:18:33 2026 -0400 Initial commit: SearXNG MCP server Python MCP server that exposes a web_search tool backed by a SearXNG instance. Includes tests with mocked HTTP via respx. Co-Authored-By: Claude Opus 4.6 diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..01c85a6 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +venv/ +__pycache__/ +*.pyc +.pytest_cache/ diff --git a/.mcp.json b/.mcp.json new file mode 100644 index 0000000..5197433 --- /dev/null +++ b/.mcp.json @@ -0,0 +1,11 @@ +{ + "mcpServers": { + "searxng": { + "command": "C:\\Users\\bheth\\Documents\\mcp-things\\searxng-mcp\\venv\\Scripts\\python.exe", + "args": ["C:\\Users\\bheth\\Documents\\mcp-things\\searxng-mcp\\server.py"], + "env": { + "SEARXNG_URL": "http://192.168.50.224:30053" + } + } + } +} diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..b47a442 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,2 @@ +mcp +httpx diff --git a/server.py b/server.py new file mode 100644 index 0000000..7d1b086 --- /dev/null +++ b/server.py @@ -0,0 +1,119 @@ +import os +from urllib.parse import urlencode + +import httpx +from mcp.server.fastmcp import FastMCP + +SEARXNG_URL = os.environ.get("SEARXNG_URL", "http://192.168.50.224:30053") + +mcp = FastMCP("searxng", instructions="Web search via SearXNG") + + +def build_search_url( + query: str, + categories: str | None = None, + language: str | None = None, + pageno: int | None = None, + time_range: str | None = None, + safesearch: int | None = None, +) -> str: + """Build the SearXNG search URL with query parameters.""" + params: dict[str, str | int] = {"q": query, "format": "json"} + if categories: + params["categories"] = categories + if language: + params["language"] = language + if pageno is not None: + params["pageno"] = pageno + if time_range: + params["time_range"] = time_range + if safesearch is not None: + params["safesearch"] = safesearch + return f"{SEARXNG_URL}/search?{urlencode(params)}" + + +def format_results(data: dict, max_results: int = 10) -> str: + """Format SearXNG JSON response into readable text.""" + parts: list[str] = [] + + answers = data.get("answers", []) + if answers: + parts.append("Direct answers:") + for answer in answers: + parts.append(f" {answer}") + parts.append("") + + results = data.get("results", []) + if not results: + return "No results found." + + results = results[:max_results] + + for i, r in enumerate(results, 1): + lines = [f"{i}. {r.get('title', 'No title')}", f" URL: {r.get('url', '')}"] + content = r.get("content", "") + if content: + lines.append(f" {content}") + published = r.get("publishedDate", "") + if published: + lines.append(f" Published: {published}") + engines = r.get("engines", []) + if engines: + lines.append(f" Source: {', '.join(engines)}") + parts.append("\n".join(lines)) + + total = data.get("total_results", "unknown") + time_taken = data.get("time_taken", "unknown") + parts.append(f"\n({total} total results in {time_taken}s)") + + return "\n\n".join(parts) + + +@mcp.tool() +async def web_search( + query: str, + categories: str | None = None, + language: str | None = None, + pageno: int | None = None, + time_range: str | None = None, + safesearch: int | None = None, + max_results: int = 10, +) -> str: + """Search the web using SearXNG. + + Args: + query: Search query string. + categories: Comma-separated categories (general, images, news, videos, music, files, it, science, social media). + language: Language code (e.g. en, de, fr). + pageno: Page number starting at 1. + time_range: Time filter: day, week, month, or year. + safesearch: Safe search level: 0=off, 1=moderate, 2=strict. + max_results: Maximum number of results to return (default 10, max 50). + """ + max_results = min(max(1, max_results), 50) + url = build_search_url( + query, + categories=categories, + language=language, + pageno=pageno, + time_range=time_range, + safesearch=safesearch, + ) + try: + async with httpx.AsyncClient(timeout=30.0) as client: + response = await client.get(url) + if response.status_code != 200: + return f"SearXNG returned HTTP {response.status_code}: {response.text[:500]}" + data = response.json() + except httpx.ConnectError as e: + return f"Connection error: Could not reach SearXNG at {SEARXNG_URL}. {e}" + except httpx.TimeoutException: + return f"Timeout: SearXNG at {SEARXNG_URL} did not respond within 30 seconds." + except Exception as e: + return f"Error querying SearXNG: {e}" + + return format_results(data, max_results=max_results) + + +if __name__ == "__main__": + mcp.run(transport="stdio") diff --git a/test_server.py b/test_server.py new file mode 100644 index 0000000..f5430d7 --- /dev/null +++ b/test_server.py @@ -0,0 +1,190 @@ +import os +from urllib.parse import parse_qs, urlparse + +import httpx +import pytest +import respx + +# Set env before importing server so the default URL is predictable +os.environ["SEARXNG_URL"] = "http://test-searxng:8080" + +import server # noqa: E402 + + +# --- build_search_url tests --- + + +def test_build_search_url_basic(): + url = server.build_search_url("hello world") + parsed = urlparse(url) + params = parse_qs(parsed.query) + assert parsed.scheme == "http" + assert parsed.netloc == "test-searxng:8080" + assert parsed.path == "/search" + assert params["q"] == ["hello world"] + assert params["format"] == ["json"] + # No optional params + assert "categories" not in params + assert "language" not in params + assert "pageno" not in params + + +def test_build_search_url_all_params(): + url = server.build_search_url( + "test query", + categories="news,science", + language="de", + pageno=3, + time_range="month", + safesearch=2, + ) + params = parse_qs(urlparse(url).query) + assert params["q"] == ["test query"] + assert params["format"] == ["json"] + assert params["categories"] == ["news,science"] + assert params["language"] == ["de"] + assert params["pageno"] == ["3"] + assert params["time_range"] == ["month"] + assert params["safesearch"] == ["2"] + + +# --- format_results tests --- + + +SAMPLE_RESULTS = { + "results": [ + { + "title": "Example Page", + "url": "https://example.com", + "content": "This is a sample result.", + "engines": ["google", "bing"], + "publishedDate": "2025-01-15", + }, + { + "title": "Another Page", + "url": "https://another.com", + "content": "Another result snippet.", + "engines": ["duckduckgo"], + }, + ], + "total_results": 100, + "time_taken": 0.5, +} + + +def test_format_results_basic(): + output = server.format_results(SAMPLE_RESULTS) + assert "1. Example Page" in output + assert "URL: https://example.com" in output + assert "This is a sample result." in output + assert "Source: google, bing" in output + assert "Published: 2025-01-15" in output + assert "2. Another Page" in output + assert "URL: https://another.com" in output + assert "(100 total results in 0.5s)" in output + + +def test_format_results_with_answers(): + data = { + "results": [{"title": "A", "url": "https://a.com", "content": "a"}], + "answers": ["42 is the answer"], + "total_results": 1, + "time_taken": 0.1, + } + output = server.format_results(data) + assert "Direct answers:" in output + assert "42 is the answer" in output + # Answers should come before results + assert output.index("Direct answers:") < output.index("1. A") + + +def test_format_results_empty(): + output = server.format_results({"results": []}) + assert output == "No results found." + + +def test_format_results_empty_no_key(): + output = server.format_results({}) + assert output == "No results found." + + +def test_format_results_max_results(): + data = { + "results": [ + {"title": f"Result {i}", "url": f"https://r{i}.com", "content": f"Content {i}"} + for i in range(20) + ], + "total_results": 20, + "time_taken": 0.3, + } + output = server.format_results(data, max_results=5) + assert "5. Result 4" in output + assert "6." not in output + + +# --- web_search integration tests (mocked HTTP) --- + + +@pytest.mark.asyncio +@respx.mock +async def test_web_search_success(): + respx.get("http://test-searxng:8080/search").mock( + return_value=httpx.Response(200, json=SAMPLE_RESULTS) + ) + result = await server.web_search("hello") + assert "Example Page" in result + assert "Another Page" in result + + +@pytest.mark.asyncio +@respx.mock +async def test_web_search_network_error(): + respx.get("http://test-searxng:8080/search").mock( + side_effect=httpx.ConnectError("Connection refused") + ) + result = await server.web_search("hello") + assert "Connection error" in result + assert "test-searxng:8080" in result + + +@pytest.mark.asyncio +@respx.mock +async def test_web_search_non_200(): + respx.get("http://test-searxng:8080/search").mock( + return_value=httpx.Response(500, text="Internal Server Error") + ) + result = await server.web_search("hello") + assert "HTTP 500" in result + + +@pytest.mark.asyncio +@respx.mock +async def test_web_search_timeout(): + respx.get("http://test-searxng:8080/search").mock( + side_effect=httpx.ReadTimeout("timed out") + ) + result = await server.web_search("hello") + assert "Timeout" in result + + +@pytest.mark.asyncio +@respx.mock +async def test_web_search_max_results_clamped(): + data = { + "results": [ + {"title": f"R{i}", "url": f"https://r{i}.com", "content": f"C{i}"} + for i in range(10) + ], + "total_results": 10, + "time_taken": 0.1, + } + respx.get("http://test-searxng:8080/search").mock( + return_value=httpx.Response(200, json=data) + ) + result = await server.web_search("hello", max_results=3) + assert "3. R2" in result + assert "4." not in result + + +def test_searxng_url_from_env(): + assert server.SEARXNG_URL == "http://test-searxng:8080"