From 30a41a4577c3aa40783398a45367746a8148abfb Mon Sep 17 00:00:00 2001 From: Aymen Hammouda Date: Tue, 28 Apr 2026 19:34:28 +0200 Subject: [PATCH 1/4] fix(docs): resolve F-01 Python 3.10-3.14 support Closes #5 --- .github/INTEGRATION-TEST.md | 8 +-- .github/RELEASE.md | 21 +++++--- .github/workflows/e2e.yml | 4 +- CONTRIBUTING.md | 2 +- README.md | 8 +-- src/mcp_server_python_docs/__main__.py | 38 ++++++++----- src/mcp_server_python_docs/detection.py | 4 +- .../ingestion/cpython_versions.py | 32 +++++++++++ .../ingestion/publish.py | 14 +++-- .../ingestion/sphinx_json.py | 45 ++++++++++++++++ src/mcp_server_python_docs/server.py | 3 +- tests/test_ci_workflows.py | 6 ++- tests/test_ingestion.py | 35 ++++++++++++ tests/test_publish.py | 54 ++++++++++++++----- 14 files changed, 221 insertions(+), 53 deletions(-) create mode 100644 src/mcp_server_python_docs/ingestion/cpython_versions.py diff --git a/.github/INTEGRATION-TEST.md b/.github/INTEGRATION-TEST.md index 4ac3585..c724c7c 100644 --- a/.github/INTEGRATION-TEST.md +++ b/.github/INTEGRATION-TEST.md @@ -13,7 +13,7 @@ Release-specific sign-off still lives in [`.github/RELEASE.md`](RELEASE.md). - `uv run pyright src/` - `uv run pytest --tb=short -q` - Local index build completed: - - `uv run mcp-server-python-docs build-index --versions 3.12,3.13` + - `uv run mcp-server-python-docs build-index --versions 3.10,3.11,3.12,3.13,3.14` - Doctor passes: - `uv run mcp-server-python-docs doctor` - Slow E2E workflow passes when preparing a release: @@ -113,7 +113,7 @@ locked. - [ ] `uvx mcp-server-python-docs --version` - Expected: prints the current package version -- [ ] `uvx mcp-server-python-docs build-index --versions 3.12,3.13` +- [ ] `uvx mcp-server-python-docs build-index --versions 3.10,3.11,3.12,3.13,3.14` - Expected: index build completes successfully - [ ] `uvx mcp-server-python-docs doctor` - Expected: all required checks pass @@ -131,8 +131,8 @@ or supported Python versions. - Expected: both Python 3.13 and Python 3.14 jobs start - [ ] Confirm each job installs the built wheel into a clean virtual environment - Expected: the command path is the installed `mcp-server-python-docs`, not editable source -- [ ] Confirm `build-index --versions 3.12,3.13` passes - - Expected: both versions produce content, not symbol-only fallback +- [ ] Confirm `build-index --versions 3.10,3.11,3.12,3.13,3.14` passes + - Expected: all five versions produce content, not symbol-only fallback - [ ] Confirm `doctor` and `validate-corpus` pass - Expected: corpus smoke checks include requested versions and the default version - [ ] Inspect uploaded logs if a job fails diff --git a/.github/RELEASE.md b/.github/RELEASE.md index 7faff87..7d42e4f 100644 --- a/.github/RELEASE.md +++ b/.github/RELEASE.md @@ -19,11 +19,15 @@ Before the first release, configure PyPI Trusted Publishing: ## Notes -**Python version coverage:** The release workflow builds and tests against Python 3.13 only. -Python 3.12 is covered by the CI workflow (`ci.yml`) which runs a 2x2 matrix (3.12/3.13 x -ubuntu/macos) on every push to `main`. Since tags are created from commits that have already -passed CI, 3.12 compatibility is verified before the release workflow runs. This is an accepted -trade-off to keep the release artifact pipeline simple (single Python version produces the wheel). +**Runtime coverage:** The release workflow builds and tests against Python 3.13 only. +Python 3.12 is covered by the CI workflow (`ci.yml`) which runs a 2x2 matrix +(3.12/3.13 x ubuntu/macos) on every push to `main`. Since tags are created +from commits that have already passed CI, 3.12 compatibility is verified before +the release workflow runs. This is an accepted trade-off to keep the release +artifact pipeline simple (single Python version produces the wheel). + +**Documentation coverage:** The full docs index target is Python documentation +versions 3.10 through 3.14. ## Creating a Release @@ -106,7 +110,7 @@ Complete these steps in order. Each step has a checkbox -- do not skip ahead. First public release of mcp-server-python-docs. A read-only, version-aware MCP retrieval server over Python - standard library documentation (3.12 + 3.13). + standard library documentation (3.10 through 3.14). Installable via: uvx mcp-server-python-docs" ``` @@ -137,7 +141,7 @@ Complete these steps in order. Each step has a checkbox -- do not skip ahead. # Should print 0.1.0 # Step 2: Build index - uvx mcp-server-python-docs build-index --versions 3.12,3.13 + uvx mcp-server-python-docs build-index --versions 3.10,3.11,3.12,3.13,3.14 # Should complete successfully # Step 3: Doctor check @@ -148,7 +152,8 @@ Complete these steps in order. Each step has a checkbox -- do not skip ahead. - Run GitHub Actions workflow `Slow E2E` - Confirm Python 3.13 and Python 3.14 jobs both pass - Confirm each job installs the built wheel, runs - `build-index --versions 3.12,3.13`, `doctor`, and `validate-corpus` + `build-index --versions 3.10,3.11,3.12,3.13,3.14`, `doctor`, and + `validate-corpus` - [ ] Claude Desktop test with published package: Configure `mcpServers` with `uvx mcp-server-python-docs` and verify "what is asyncio.TaskGroup" returns a correct hit diff --git a/.github/workflows/e2e.yml b/.github/workflows/e2e.yml index 91f8223..1091ba3 100644 --- a/.github/workflows/e2e.yml +++ b/.github/workflows/e2e.yml @@ -9,7 +9,7 @@ jobs: installed-build-index: name: Installed build-index (Python ${{ matrix.python-version }}) runs-on: ubuntu-latest - timeout-minutes: 60 + timeout-minutes: 120 strategy: fail-fast: false @@ -42,7 +42,7 @@ jobs: - name: Build and validate full docs index run: | set -o pipefail - .e2e-venv/bin/mcp-server-python-docs build-index --versions 3.12,3.13 \ + .e2e-venv/bin/mcp-server-python-docs build-index --versions 3.10,3.11,3.12,3.13,3.14 \ 2>&1 | tee "${RUNNER_TEMP}/build-index-${{ matrix.python-version }}.log" .e2e-venv/bin/mcp-server-python-docs doctor .e2e-venv/bin/mcp-server-python-docs validate-corpus diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 94e2a65..62577e9 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -42,7 +42,7 @@ uv run pytest tests/test_retrieval_regression.py -q The server needs a local SQLite index before runtime validation: ```bash -uv run mcp-server-python-docs build-index --versions 3.12,3.13 +uv run mcp-server-python-docs build-index --versions 3.10,3.11,3.12,3.13,3.14 uv run mcp-server-python-docs doctor uv run mcp-server-python-docs validate-corpus ``` diff --git a/README.md b/README.md index 85f123a..cceb308 100644 --- a/README.md +++ b/README.md @@ -80,13 +80,13 @@ shell or use `python -m uv ...` as a fallback for local contributor commands. Build the local documentation index: ```bash -uvx mcp-server-python-docs build-index --versions 3.12,3.13 +uvx mcp-server-python-docs build-index --versions 3.10,3.11,3.12,3.13,3.14 ``` If you installed the package persistently, you can drop the `uvx` prefix: ```bash -mcp-server-python-docs build-index --versions 3.12,3.13 +mcp-server-python-docs build-index --versions 3.10,3.11,3.12,3.13,3.14 ``` This downloads Python's `objects.inv` files, clones CPython docs sources, runs @@ -178,7 +178,7 @@ The server currently exposes four MCP tools: Use this server when you need: - exact Python stdlib symbol resolution -- consistent version-aware answers across Python 3.12 and 3.13 +- consistent version-aware answers across Python 3.10 through 3.14 - token-efficient section retrieval from official docs - a local, read-only MCP server with a simple operational story @@ -292,7 +292,7 @@ For contributor setup and verification: Tested on macOS and Linux. Windows should work, but it is not verified on every release. -Python 3.12 and 3.13 are currently supported. +Python documentation versions 3.10 through 3.14 are currently supported. ## License diff --git a/src/mcp_server_python_docs/__main__.py b/src/mcp_server_python_docs/__main__.py index b8fc22d..d036f74 100644 --- a/src/mcp_server_python_docs/__main__.py +++ b/src/mcp_server_python_docs/__main__.py @@ -105,7 +105,7 @@ def serve() -> None: @click.option( "--versions", required=True, - help="Comma-separated Python versions (e.g., 3.12,3.13)", + help="Comma-separated Python versions (e.g., 3.10,3.11,3.12,3.13,3.14)", ) @click.option( "--skip-content", @@ -120,6 +120,9 @@ def build_index(versions: str, skip_content: bool) -> None: import venv from pathlib import Path + from mcp_server_python_docs.ingestion.cpython_versions import ( + CPYTHON_DOCS_BUILD_CONFIG, + ) from mcp_server_python_docs.ingestion.inventory import ingest_inventory from mcp_server_python_docs.ingestion.publish import ( _version_sort_key, @@ -128,6 +131,7 @@ def build_index(versions: str, skip_content: bool) -> None: publish_index, ) from mcp_server_python_docs.ingestion.sphinx_json import ( + build_sphinx_bootstrap_requirements, build_sphinx_json_command, ingest_sphinx_json_dir, make_sphinx_json_env, @@ -142,15 +146,11 @@ def build_index(versions: str, skip_content: bool) -> None: get_readwrite_connection, ) - # Version tag mapping: CPython git tag and Sphinx constraints (INGR-C-02) - VERSION_CONFIG: dict[str, dict[str, str]] = { - "3.12": {"tag": "v3.12.13", "sphinx_pin": "sphinx~=8.2.0"}, - "3.13": {"tag": "v3.13.12", "sphinx_pin": "sphinx<9.0.0"}, - } - version_list = parse_expected_versions(versions) if not version_list: - logger.error("No valid versions specified. Example: --versions 3.13") + logger.error( + "No valid versions specified. Example: --versions 3.10,3.11,3.12,3.13,3.14" + ) raise SystemExit(1) # Validate version format before sorting (CR-03, WR-04) @@ -188,7 +188,7 @@ def build_index(versions: str, skip_content: bool) -> None: continue # === Content ingestion (INGR-C-01 through INGR-C-03) === - config = VERSION_CONFIG.get(version) + config = CPYTHON_DOCS_BUILD_CONFIG.get(version) if not config: logger.warning( "No CPython build config for %s, skipping content ingestion", @@ -226,9 +226,17 @@ def build_index(versions: str, skip_content: bool) -> None: ) pip_path = os.path.join(scripts_dir, "pip") - # Install Sphinx with the version pin for this CPython branch + # Install Sphinx with the version pin for this CPython branch. + # Older Sphinx releases still import pkg_resources, which + # modern venvs do not always seed by default. subprocess.run( - [pip_path, "install", config["sphinx_pin"]], + [ + pip_path, + "install", + *build_sphinx_bootstrap_requirements( + config["sphinx_pin"] + ), + ], check=True, capture_output=True, text=True, @@ -381,7 +389,10 @@ def validate_corpus(db_path: str | None) -> None: if not target.exists(): logger.error("Index not found at %s", target) - logger.error("Run: mcp-server-python-docs build-index --versions 3.13") + logger.error( + "Run: mcp-server-python-docs build-index --versions " + "3.10,3.11,3.12,3.13,3.14" + ) raise SystemExit(1) logger.info("Validating corpus at %s", target) @@ -506,7 +517,8 @@ def doctor() -> None: index_detail = str(index_path) if not index_exists: index_detail += ( - " (not found -- run: mcp-server-python-docs build-index --versions 3.13)" + " (not found -- run: mcp-server-python-docs build-index --versions " + "3.10,3.11,3.12,3.13,3.14)" ) else: size_mb = index_path.stat().st_size / (1024 * 1024) diff --git a/src/mcp_server_python_docs/detection.py b/src/mcp_server_python_docs/detection.py index e1f12bc..a7a3691 100644 --- a/src/mcp_server_python_docs/detection.py +++ b/src/mcp_server_python_docs/detection.py @@ -74,8 +74,8 @@ def match_to_indexed( """Match a detected version to the closest indexed version. Returns the detected version if it's in the index, otherwise None. - We don't guess — if 3.11 is detected but only 3.12/3.13 are indexed, - return None and let the normal default resolution handle it. + We don't guess -- if a detected version is not indexed, return None and + let the normal default resolution handle it. """ if detected in indexed_versions: return detected diff --git a/src/mcp_server_python_docs/ingestion/cpython_versions.py b/src/mcp_server_python_docs/ingestion/cpython_versions.py new file mode 100644 index 0000000..eed3a49 --- /dev/null +++ b/src/mcp_server_python_docs/ingestion/cpython_versions.py @@ -0,0 +1,32 @@ +"""Pinned CPython documentation build targets.""" +from __future__ import annotations + +from typing import Final, TypedDict + + +class CPythonDocsBuildConfig(TypedDict): + """Build settings for one CPython documentation release.""" + + tag: str + sphinx_pin: str + + +SUPPORTED_DOC_VERSIONS: Final[tuple[str, ...]] = ( + "3.10", + "3.11", + "3.12", + "3.13", + "3.14", +) + +SUPPORTED_DOC_VERSIONS_CSV: Final[str] = ",".join(SUPPORTED_DOC_VERSIONS) + +# CPython git tags are pinned so content builds are reproducible and do not +# drift when a maintenance branch receives new commits. +CPYTHON_DOCS_BUILD_CONFIG: Final[dict[str, CPythonDocsBuildConfig]] = { + "3.10": {"tag": "v3.10.20", "sphinx_pin": "sphinx==3.4.3"}, + "3.11": {"tag": "v3.11.15", "sphinx_pin": "sphinx~=7.2.0"}, + "3.12": {"tag": "v3.12.13", "sphinx_pin": "sphinx~=8.2.0"}, + "3.13": {"tag": "v3.13.13", "sphinx_pin": "sphinx<9.0.0"}, + "3.14": {"tag": "v3.14.4", "sphinx_pin": "sphinx<9.0.0"}, +} diff --git a/src/mcp_server_python_docs/ingestion/publish.py b/src/mcp_server_python_docs/ingestion/publish.py index e5b4c41..69cc058 100644 --- a/src/mcp_server_python_docs/ingestion/publish.py +++ b/src/mcp_server_python_docs/ingestion/publish.py @@ -23,6 +23,8 @@ logger = logging.getLogger(__name__) +SMOKE_SENTINEL_SYMBOL = "asyncio.run" + def _version_sort_key(version: str) -> tuple[int, ...]: """Sort dotted Python versions numerically.""" @@ -83,7 +85,7 @@ def record_ingestion_run( Args: conn: Read-write SQLite connection. source: Source identifier (e.g., 'python-docs'). - version: Version string (e.g., '3.13' or '3.12,3.13'). + version: Version string (e.g., '3.13' or '3.10,3.11,3.12,3.13,3.14'). status: Run status ('building', 'smoke_testing', 'published', 'failed'). artifact_hash: SHA256 hash of the build artifact. notes: Optional notes about the run. @@ -221,16 +223,18 @@ def run_smoke_tests( "SELECT 1 FROM symbols " "JOIN doc_sets ON doc_sets.id = symbols.doc_set_id " "WHERE doc_sets.version = ? " - "AND symbols.qualified_name = 'asyncio.TaskGroup' LIMIT 1", - (version,), + "AND symbols.qualified_name = ? LIMIT 1", + (version, SMOKE_SENTINEL_SYMBOL), ).fetchone() if row: messages.append( - f"OK: sentinel: asyncio.TaskGroup symbol found for version {version}" + f"OK: sentinel: {SMOKE_SENTINEL_SYMBOL} symbol found " + f"for version {version}" ) else: messages.append( - f"FAIL: sentinel: asyncio.TaskGroup symbol missing for version {version}" + f"FAIL: sentinel: {SMOKE_SENTINEL_SYMBOL} symbol missing " + f"for version {version}" ) passed = False diff --git a/src/mcp_server_python_docs/ingestion/sphinx_json.py b/src/mcp_server_python_docs/ingestion/sphinx_json.py index e7878be..e40fa75 100644 --- a/src/mcp_server_python_docs/ingestion/sphinx_json.py +++ b/src/mcp_server_python_docs/ingestion/sphinx_json.py @@ -63,6 +63,41 @@ def _mcp_json_default(self, obj): jsonimpl.SphinxJSONEncoder.default = _mcp_json_default ''' +_IMGHDR_COMPAT_MODULE = '''"""Compatibility shim for old Sphinx on Python 3.13+.""" + +from __future__ import annotations + +import os + + +tests = [] + + +def what(file, h=None): + """Return an image type for the header formats old Sphinx may ask about.""" + if h is None: + if isinstance(file, (str, bytes, os.PathLike)): + with open(file, "rb") as image_file: + h = image_file.read(32) + else: + position = file.tell() + h = file.read(32) + file.seek(position) + + for test in tests: + result = test(h, file) + if result: + return result + + if h.startswith(b"\\xff\\xd8"): + return "jpeg" + if h.startswith(b"\\x89PNG\\r\\n\\x1a\\n"): + return "png" + if h[:6] in (b"GIF87a", b"GIF89a"): + return "gif" + return None +''' + def _canonical_requirement_name(line: str) -> str | None: stripped = line.strip() @@ -109,6 +144,8 @@ def write_sphinx_json_sitecustomize(output_dir: Path) -> Path: output_dir.mkdir(parents=True, exist_ok=True) sitecustomize_path = output_dir / "sitecustomize.py" sitecustomize_path.write_text(_SPHINX_JSON_SITECUSTOMIZE, encoding="utf-8") + imghdr_path = output_dir / "imghdr.py" + imghdr_path.write_text(_IMGHDR_COMPAT_MODULE, encoding="utf-8") return sitecustomize_path @@ -145,6 +182,14 @@ def build_sphinx_json_command( ] +def build_sphinx_bootstrap_requirements(sphinx_pin: str) -> list[str]: + """Return packages needed before installing CPython Doc requirements.""" + return [ + "setuptools<70", + sphinx_pin, + ] + + def parse_fjson(filepath: Path) -> dict: """Load and parse a .fjson file. diff --git a/src/mcp_server_python_docs/server.py b/src/mcp_server_python_docs/server.py index 93e0cb8..c14e3a7 100644 --- a/src/mcp_server_python_docs/server.py +++ b/src/mcp_server_python_docs/server.py @@ -69,7 +69,8 @@ async def app_lifespan(server: FastMCP) -> AsyncIterator[AppContext]: if not index_path.exists(): msg = ( f"No index found at {index_path}\n" - f"Run: mcp-server-python-docs build-index --versions 3.13" + "Run: mcp-server-python-docs build-index --versions " + "3.10,3.11,3.12,3.13,3.14" ) logger.error(msg) print(msg, file=sys.stderr) diff --git a/tests/test_ci_workflows.py b/tests/test_ci_workflows.py index d4fb0a0..b6e0890 100644 --- a/tests/test_ci_workflows.py +++ b/tests/test_ci_workflows.py @@ -4,6 +4,7 @@ from pathlib import Path PROJECT_ROOT = Path(__file__).parent.parent +SUPPORTED_VERSION_ARGS = "3.10,3.11,3.12,3.13,3.14" def test_slow_e2e_workflow_runs_installed_build_index() -> None: @@ -18,7 +19,10 @@ def test_slow_e2e_workflow_runs_installed_build_index() -> None: assert "uv build" in content assert "python -m venv" in content assert "python -m pip install dist/" in content - assert "mcp-server-python-docs build-index --versions 3.12,3.13" in content + assert ( + f"mcp-server-python-docs build-index --versions {SUPPORTED_VERSION_ARGS}" + in content + ) assert "mcp-server-python-docs doctor" in content assert "mcp-server-python-docs validate-corpus" in content assert "actions/upload-artifact" in content diff --git a/tests/test_ingestion.py b/tests/test_ingestion.py index 7eb2d15..0b4e668 100644 --- a/tests/test_ingestion.py +++ b/tests/test_ingestion.py @@ -16,7 +16,13 @@ import pytest from mcp_server_python_docs.errors import IngestionError +from mcp_server_python_docs.ingestion.cpython_versions import ( + CPYTHON_DOCS_BUILD_CONFIG, + SUPPORTED_DOC_VERSIONS, + SUPPORTED_DOC_VERSIONS_CSV, +) from mcp_server_python_docs.ingestion.sphinx_json import ( + build_sphinx_bootstrap_requirements, build_sphinx_json_command, extract_code_blocks, extract_sections, @@ -32,6 +38,20 @@ ) +class TestCPythonVersionConfig: + def test_supports_python_3_10_through_3_14(self): + assert SUPPORTED_DOC_VERSIONS == ("3.10", "3.11", "3.12", "3.13", "3.14") + assert SUPPORTED_DOC_VERSIONS_CSV == "3.10,3.11,3.12,3.13,3.14" + + def test_supported_versions_have_pinned_docs_build_config(self): + assert set(CPYTHON_DOCS_BUILD_CONFIG) == set(SUPPORTED_DOC_VERSIONS) + + for version in SUPPORTED_DOC_VERSIONS: + config = CPYTHON_DOCS_BUILD_CONFIG[version] + assert config["tag"].startswith(f"v{version}.") + assert config["sphinx_pin"].startswith("sphinx") + + class TestJsonBuildRequirements: def test_omits_html_only_sphinx_extensions(self, tmp_path): source = tmp_path / "requirements.txt" @@ -90,6 +110,16 @@ def test_writes_translation_proxy_json_patch(self, tmp_path): assert "_TranslationProxy" in content assert "SphinxJSONEncoder.default" in content + def test_writes_imghdr_compat_module(self, tmp_path): + output_dir = tmp_path / "compat" + + write_sphinx_json_sitecustomize(output_dir) + + content = (output_dir / "imghdr.py").read_text(encoding="utf-8") + assert "tests = []" in content + assert "def what" in content + assert "jpeg" in content + def test_translation_proxy_patch_stringifies_proxy_objects( self, tmp_path, monkeypatch ): @@ -145,6 +175,11 @@ def test_sphinx_json_env_sets_compat_dir_without_existing_pythonpath(self, tmp_p class TestSphinxJsonCommand: + def test_bootstrap_requirements_include_setuptools_before_sphinx(self): + requirements = build_sphinx_bootstrap_requirements("sphinx==3.4.3") + + assert requirements == ["setuptools<70", "sphinx==3.4.3"] + def test_build_command_uses_json_builder_and_classic_theme(self, tmp_path): sphinx_build = tmp_path / "bin" / "sphinx-build" doc_dir = tmp_path / "cpython" / "Doc" diff --git a/tests/test_publish.py b/tests/test_publish.py index 00d3073..6686ca5 100644 --- a/tests/test_publish.py +++ b/tests/test_publish.py @@ -12,7 +12,9 @@ import pytest +from mcp_server_python_docs.ingestion.cpython_versions import SUPPORTED_DOC_VERSIONS from mcp_server_python_docs.ingestion.publish import ( + SMOKE_SENTINEL_SYMBOL, atomic_swap, compute_sha256, generate_build_path, @@ -96,7 +98,7 @@ def _create_symbols_only_db(self, db_path: Path) -> None: for i in range(2100): qualified_name = f"mod{i}.func{i}" if i == 0: - qualified_name = "asyncio.TaskGroup" + qualified_name = SMOKE_SENTINEL_SYMBOL conn.execute( "INSERT INTO symbols (doc_set_id, qualified_name, normalized_name, " "module, symbol_type, uri, anchor) " @@ -115,22 +117,30 @@ def _create_symbols_only_db(self, db_path: Path) -> None: conn.commit() conn.close() - def _create_populated_db(self, db_path: Path) -> None: + def _create_populated_db( + self, + db_path: Path, + versions: tuple[str, ...] = ("3.12", "3.13"), + ) -> None: """Helper: create a DB with enough data to pass smoke tests.""" conn = get_readwrite_connection(db_path) bootstrap_schema(conn) doc_set_ids: dict[str, int] = {} + default_version = max( + versions, + key=lambda v: tuple(int(p) for p in v.split(".")), + ) # Insert doc_sets - for version, is_default in (("3.12", 0), ("3.13", 1)): + for version in versions: conn.execute( "INSERT INTO doc_sets (source, version, language, label, is_default, base_url) " "VALUES ('python-docs', ?, 'en', ?, ?, ?)", ( version, f"Python {version}", - is_default, + 1 if version == default_version else 0, f"https://docs.python.org/{version}/", ), ) @@ -179,10 +189,14 @@ def _create_populated_db(self, db_path: Path) -> None: conn.execute( "INSERT INTO symbols (doc_set_id, qualified_name, normalized_name, " "module, symbol_type, uri, anchor) " - "VALUES (?, 'asyncio.TaskGroup', 'asyncio_taskgroup', 'asyncio', " - "'class', 'library/asyncio-task.html#asyncio.TaskGroup', " - "'asyncio.TaskGroup')", - (doc_set_id,), + "VALUES (?, ?, ?, 'asyncio', " + "'function', 'library/asyncio-runner.html#asyncio.run', " + "'asyncio.run')", + ( + doc_set_id, + SMOKE_SENTINEL_SYMBOL, + SMOKE_SENTINEL_SYMBOL.lower(), + ), ) for i in range(2099): conn.execute( @@ -244,6 +258,22 @@ def test_pass_on_populated_db(self, tmp_path): assert passed is True assert any("OK" in m for m in messages) + def test_pass_on_supported_issue_5_version_range(self, tmp_path): + """Smoke tests accept the full supported docs version range.""" + db_path = tmp_path / "issue-5-range.db" + self._create_populated_db(db_path, versions=SUPPORTED_DOC_VERSIONS) + + passed, messages = run_smoke_tests( + db_path, expected_versions=SUPPORTED_DOC_VERSIONS + ) + + assert passed is True + assert ( + "OK: doc_sets: expected versions present: 3.10, 3.11, 3.12, 3.13, 3.14" + in messages + ) + assert "OK: doc_sets: default version is 3.14" in messages + def test_fails_when_expected_version_is_missing(self, tmp_path): """Smoke tests fail when a requested build version is absent.""" db_path = tmp_path / "missing-version.db" @@ -292,11 +322,11 @@ def test_fails_when_expected_version_has_no_content(self, tmp_path): assert passed is False assert "FAIL: documents: version 3.13 has 0 rows (need >= 10)" in messages - def test_fails_when_asyncio_taskgroup_symbol_sentinel_is_missing(self, tmp_path): - """Smoke tests fail when the core asyncio.TaskGroup symbol sentinel is absent.""" + def test_fails_when_asyncio_run_symbol_sentinel_is_missing(self, tmp_path): + """Smoke tests fail when the cross-version asyncio.run sentinel is absent.""" db_path = tmp_path / "missing-sentinel-symbol.db" self._create_populated_db(db_path) - self._remove_symbol(db_path, "asyncio.TaskGroup") + self._remove_symbol(db_path, SMOKE_SENTINEL_SYMBOL) passed, messages = run_smoke_tests( db_path, expected_versions=["3.12", "3.13"] @@ -304,7 +334,7 @@ def test_fails_when_asyncio_taskgroup_symbol_sentinel_is_missing(self, tmp_path) assert passed is False assert ( - "FAIL: sentinel: asyncio.TaskGroup symbol missing for version 3.13" + f"FAIL: sentinel: {SMOKE_SENTINEL_SYMBOL} symbol missing for version 3.13" in messages ) From ee9e3c1cd3a19b210090f7d3834fb347bd688c17 Mon Sep 17 00:00:00 2001 From: Aymen Hammouda Date: Tue, 28 Apr 2026 19:45:48 +0200 Subject: [PATCH 2/4] refactor: reuse docs version helpers --- src/mcp_server_python_docs/__main__.py | 17 +++++++++++------ .../ingestion/sphinx_json.py | 7 ++++++- src/mcp_server_python_docs/server.py | 8 ++++++-- tests/test_publish.py | 6 ++---- 4 files changed, 25 insertions(+), 13 deletions(-) diff --git a/src/mcp_server_python_docs/__main__.py b/src/mcp_server_python_docs/__main__.py index d036f74..8500896 100644 --- a/src/mcp_server_python_docs/__main__.py +++ b/src/mcp_server_python_docs/__main__.py @@ -66,6 +66,10 @@ def _consume_saved_stdout_fd() -> int: # === Now safe to import everything else === import click # noqa: E402 +from mcp_server_python_docs.ingestion.cpython_versions import ( # noqa: E402 + SUPPORTED_DOC_VERSIONS_CSV, +) + @click.group(invoke_without_command=True) @click.option("--version", "show_version", is_flag=True, help="Show version and exit.") @@ -105,7 +109,7 @@ def serve() -> None: @click.option( "--versions", required=True, - help="Comma-separated Python versions (e.g., 3.10,3.11,3.12,3.13,3.14)", + help=f"Comma-separated Python versions (e.g., {SUPPORTED_DOC_VERSIONS_CSV})", ) @click.option( "--skip-content", @@ -149,7 +153,8 @@ def build_index(versions: str, skip_content: bool) -> None: version_list = parse_expected_versions(versions) if not version_list: logger.error( - "No valid versions specified. Example: --versions 3.10,3.11,3.12,3.13,3.14" + "No valid versions specified. Example: --versions %s", + SUPPORTED_DOC_VERSIONS_CSV, ) raise SystemExit(1) @@ -390,8 +395,8 @@ def validate_corpus(db_path: str | None) -> None: if not target.exists(): logger.error("Index not found at %s", target) logger.error( - "Run: mcp-server-python-docs build-index --versions " - "3.10,3.11,3.12,3.13,3.14" + "Run: mcp-server-python-docs build-index --versions %s", + SUPPORTED_DOC_VERSIONS_CSV, ) raise SystemExit(1) @@ -517,8 +522,8 @@ def doctor() -> None: index_detail = str(index_path) if not index_exists: index_detail += ( - " (not found -- run: mcp-server-python-docs build-index --versions " - "3.10,3.11,3.12,3.13,3.14)" + f" (not found -- run: mcp-server-python-docs build-index --versions " + f"{SUPPORTED_DOC_VERSIONS_CSV})" ) else: size_mb = index_path.stat().st_size / (1024 * 1024) diff --git a/src/mcp_server_python_docs/ingestion/sphinx_json.py b/src/mcp_server_python_docs/ingestion/sphinx_json.py index e40fa75..a21b9d6 100644 --- a/src/mcp_server_python_docs/ingestion/sphinx_json.py +++ b/src/mcp_server_python_docs/ingestion/sphinx_json.py @@ -183,7 +183,12 @@ def build_sphinx_json_command( def build_sphinx_bootstrap_requirements(sphinx_pin: str) -> list[str]: - """Return packages needed before installing CPython Doc requirements.""" + """Return packages needed before installing CPython Doc requirements. + + setuptools<70 keeps ``pkg_resources`` available, which old Sphinx + releases (e.g. the 3.4.x line pinned for the Python 3.10 docs build) + still import at startup. + """ return [ "setuptools<70", sphinx_pin, diff --git a/src/mcp_server_python_docs/server.py b/src/mcp_server_python_docs/server.py index c14e3a7..a6d8f03 100644 --- a/src/mcp_server_python_docs/server.py +++ b/src/mcp_server_python_docs/server.py @@ -67,10 +67,14 @@ async def app_lifespan(server: FastMCP) -> AsyncIterator[AppContext]: # Fail fast on missing index (SRVR-10) if not index_path.exists(): + from mcp_server_python_docs.ingestion.cpython_versions import ( + SUPPORTED_DOC_VERSIONS_CSV, + ) + msg = ( f"No index found at {index_path}\n" - "Run: mcp-server-python-docs build-index --versions " - "3.10,3.11,3.12,3.13,3.14" + f"Run: mcp-server-python-docs build-index --versions " + f"{SUPPORTED_DOC_VERSIONS_CSV}" ) logger.error(msg) print(msg, file=sys.stderr) diff --git a/tests/test_publish.py b/tests/test_publish.py index 6686ca5..1292089 100644 --- a/tests/test_publish.py +++ b/tests/test_publish.py @@ -15,6 +15,7 @@ from mcp_server_python_docs.ingestion.cpython_versions import SUPPORTED_DOC_VERSIONS from mcp_server_python_docs.ingestion.publish import ( SMOKE_SENTINEL_SYMBOL, + _version_sort_key, atomic_swap, compute_sha256, generate_build_path, @@ -127,10 +128,7 @@ def _create_populated_db( bootstrap_schema(conn) doc_set_ids: dict[str, int] = {} - default_version = max( - versions, - key=lambda v: tuple(int(p) for p in v.split(".")), - ) + default_version = max(versions, key=_version_sort_key) # Insert doc_sets for version in versions: From cd774989644ad0a4baa6a43f01ea4926fd4b7501 Mon Sep 17 00:00:00 2001 From: Aymen Hammouda Date: Tue, 28 Apr 2026 19:56:52 +0200 Subject: [PATCH 3/4] Address PR review cleanup --- src/mcp_server_python_docs/__main__.py | 15 ++++++--- .../ingestion/publish.py | 3 +- .../ingestion/sphinx_json.py | 28 ++++++++++++---- tests/test_ci_workflows.py | 2 ++ tests/test_ingestion.py | 33 +++++++++++++++++++ 5 files changed, 69 insertions(+), 12 deletions(-) diff --git a/src/mcp_server_python_docs/__main__.py b/src/mcp_server_python_docs/__main__.py index 8500896..35fc016 100644 --- a/src/mcp_server_python_docs/__main__.py +++ b/src/mcp_server_python_docs/__main__.py @@ -232,15 +232,20 @@ def build_index(versions: str, skip_content: bool) -> None: pip_path = os.path.join(scripts_dir, "pip") # Install Sphinx with the version pin for this CPython branch. - # Older Sphinx releases still import pkg_resources, which - # modern venvs do not always seed by default. + bootstrap_requirements = build_sphinx_bootstrap_requirements( + config["sphinx_pin"] + ) + if len(bootstrap_requirements) > 1: + logger.info( + "Installing Sphinx bootstrap packages for Python %s: %s", + version, + ", ".join(bootstrap_requirements[:-1]), + ) subprocess.run( [ pip_path, "install", - *build_sphinx_bootstrap_requirements( - config["sphinx_pin"] - ), + *bootstrap_requirements, ], check=True, capture_output=True, diff --git a/src/mcp_server_python_docs/ingestion/publish.py b/src/mcp_server_python_docs/ingestion/publish.py index 69cc058..7260cf0 100644 --- a/src/mcp_server_python_docs/ingestion/publish.py +++ b/src/mcp_server_python_docs/ingestion/publish.py @@ -14,6 +14,7 @@ from collections.abc import Iterable from datetime import datetime from pathlib import Path +from typing import Final from mcp_server_python_docs.storage.db import ( get_cache_dir, @@ -23,7 +24,7 @@ logger = logging.getLogger(__name__) -SMOKE_SENTINEL_SYMBOL = "asyncio.run" +SMOKE_SENTINEL_SYMBOL: Final[str] = "asyncio.run" def _version_sort_key(version: str) -> tuple[int, ...]: diff --git a/src/mcp_server_python_docs/ingestion/sphinx_json.py b/src/mcp_server_python_docs/ingestion/sphinx_json.py index a21b9d6..8156d6f 100644 --- a/src/mcp_server_python_docs/ingestion/sphinx_json.py +++ b/src/mcp_server_python_docs/ingestion/sphinx_json.py @@ -63,13 +63,14 @@ def _mcp_json_default(self, obj): jsonimpl.SphinxJSONEncoder.default = _mcp_json_default ''' -_IMGHDR_COMPAT_MODULE = '''"""Compatibility shim for old Sphinx on Python 3.13+.""" +_IMGHDR_COMPAT_MODULE = '''"""Compatibility shim for Sphinx builds that import imghdr.""" from __future__ import annotations import os +# Preserve the stdlib imghdr extension hook for old Sphinx-related extensions. tests = [] @@ -144,6 +145,8 @@ def write_sphinx_json_sitecustomize(output_dir: Path) -> Path: output_dir.mkdir(parents=True, exist_ok=True) sitecustomize_path = output_dir / "sitecustomize.py" sitecustomize_path.write_text(_SPHINX_JSON_SITECUSTOMIZE, encoding="utf-8") + # PYTHONPATH prepending makes this shim shadow stdlib imghdr on Python 3.12. + # The detected formats match the Sphinx usage we need for CPython docs builds. imghdr_path = output_dir / "imghdr.py" imghdr_path.write_text(_IMGHDR_COMPAT_MODULE, encoding="utf-8") return sitecustomize_path @@ -182,17 +185,30 @@ def build_sphinx_json_command( ] +def _sphinx_pin_needs_pkg_resources(sphinx_pin: str) -> bool: + normalized = sphinx_pin.strip().lower().replace(" ", "") + return normalized.startswith( + ( + "sphinx==3.", + "sphinx==4.", + "sphinx~=3.", + "sphinx~=4.", + "sphinx<5", + "sphinx<=4", + ) + ) + + def build_sphinx_bootstrap_requirements(sphinx_pin: str) -> list[str]: """Return packages needed before installing CPython Doc requirements. - setuptools<70 keeps ``pkg_resources`` available, which old Sphinx + setuptools<70 keeps ``pkg_resources`` available when old Sphinx releases (e.g. the 3.4.x line pinned for the Python 3.10 docs build) still import at startup. """ - return [ - "setuptools<70", - sphinx_pin, - ] + if _sphinx_pin_needs_pkg_resources(sphinx_pin): + return ["setuptools<70", sphinx_pin] + return [sphinx_pin] def parse_fjson(filepath: Path) -> dict: diff --git a/tests/test_ci_workflows.py b/tests/test_ci_workflows.py index b6e0890..85f7a13 100644 --- a/tests/test_ci_workflows.py +++ b/tests/test_ci_workflows.py @@ -4,6 +4,8 @@ from pathlib import Path PROJECT_ROOT = Path(__file__).parent.parent +# Keep this literal independent of SUPPORTED_DOC_VERSIONS_CSV so the test catches +# workflow drift even if the application constant changes at the same time. SUPPORTED_VERSION_ARGS = "3.10,3.11,3.12,3.13,3.14" diff --git a/tests/test_ingestion.py b/tests/test_ingestion.py index 0b4e668..3e56346 100644 --- a/tests/test_ingestion.py +++ b/tests/test_ingestion.py @@ -7,6 +7,7 @@ """ from __future__ import annotations +import io import os import runpy import shutil @@ -117,9 +118,31 @@ def test_writes_imghdr_compat_module(self, tmp_path): content = (output_dir / "imghdr.py").read_text(encoding="utf-8") assert "tests = []" in content + assert "stdlib imghdr extension hook" in content assert "def what" in content assert "jpeg" in content + def test_imghdr_compat_module_detects_sphinx_image_formats(self, tmp_path): + output_dir = tmp_path / "compat" + write_sphinx_json_sitecustomize(output_dir) + namespace = runpy.run_path(str(output_dir / "imghdr.py")) + + what = namespace["what"] + + assert what(io.BytesIO(b"\xff\xd8\xff\xe0")) == "jpeg" + assert what(io.BytesIO(b"\x89PNG\r\n\x1a\nextra")) == "png" + assert what(io.BytesIO(b"GIF89aextra")) == "gif" + + def test_imghdr_compat_module_preserves_tests_hook(self, tmp_path): + output_dir = tmp_path / "compat" + write_sphinx_json_sitecustomize(output_dir) + namespace = runpy.run_path(str(output_dir / "imghdr.py")) + + tests = namespace["tests"] + tests.append(lambda header, _file: "bmp" if header.startswith(b"BM") else None) + + assert namespace["what"](io.BytesIO(b"BMfake")) == "bmp" + def test_translation_proxy_patch_stringifies_proxy_objects( self, tmp_path, monkeypatch ): @@ -180,6 +203,16 @@ def test_bootstrap_requirements_include_setuptools_before_sphinx(self): assert requirements == ["setuptools<70", "sphinx==3.4.3"] + def test_bootstrap_requirements_include_setuptools_for_sphinx_4(self): + requirements = build_sphinx_bootstrap_requirements("Sphinx < 5") + + assert requirements == ["setuptools<70", "Sphinx < 5"] + + def test_bootstrap_requirements_skip_setuptools_for_modern_sphinx(self): + requirements = build_sphinx_bootstrap_requirements("sphinx~=8.2.0") + + assert requirements == ["sphinx~=8.2.0"] + def test_build_command_uses_json_builder_and_classic_theme(self, tmp_path): sphinx_build = tmp_path / "bin" / "sphinx-build" doc_dir = tmp_path / "cpython" / "Doc" From eebc2e30a27e02e2ce1d9ed5a893e1df6bae17a0 Mon Sep 17 00:00:00 2001 From: Aymen Hammouda Date: Tue, 28 Apr 2026 19:59:47 +0200 Subject: [PATCH 4/4] Fix E2E workflow env context --- .github/workflows/e2e.yml | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/.github/workflows/e2e.yml b/.github/workflows/e2e.yml index 1091ba3..65c9b71 100644 --- a/.github/workflows/e2e.yml +++ b/.github/workflows/e2e.yml @@ -16,10 +16,6 @@ jobs: matrix: python-version: ["3.13", "3.14"] - env: - HOME: ${{ runner.temp }}/mcp-python-docs-home - XDG_CACHE_HOME: ${{ runner.temp }}/mcp-python-docs-cache - steps: - uses: actions/checkout@v4 @@ -40,6 +36,9 @@ jobs: .e2e-venv/bin/mcp-server-python-docs --version - name: Build and validate full docs index + env: + HOME: ${{ runner.temp }}/mcp-python-docs-home + XDG_CACHE_HOME: ${{ runner.temp }}/mcp-python-docs-cache run: | set -o pipefail .e2e-venv/bin/mcp-server-python-docs build-index --versions 3.10,3.11,3.12,3.13,3.14 \