diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 42e1f82..62fa4a1 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -36,6 +36,16 @@ jobs: - name: Run ruff run: python -m ruff check src/ tests/ + - name: Guard — ANTHROPIC_API_KEY must NOT be set for default suite + run: | + if [ -n "${ANTHROPIC_API_KEY:-}" ]; then + echo "::error::ANTHROPIC_API_KEY is set in the default test environment." + echo "This can cause non-'live'-marked tests to leak real API calls." + echo "Either unset the key, or mark the test with @pytest.mark.live." + exit 1 + fi + shell: bash + - name: Run tests (with coverage on ubuntu x py3.11) run: | if [ "${{ matrix.os }}" = "ubuntu-latest" ] && [ "${{ matrix.python-version }}" = "3.11" ]; then @@ -67,5 +77,12 @@ jobs: run: | python -m pip install --upgrade pip python -m pip install -e ".[dev]" + - name: Guard — ANTHROPIC_API_KEY must NOT be set for default suite + run: | + if [ -n "${ANTHROPIC_API_KEY:-}" ]; then + echo "::error::ANTHROPIC_API_KEY is set in the default test environment." + exit 1 + fi + shell: bash - name: Run tests; shim tests must skip cleanly via importorskip run: python -m pytest tests/ -v --tb=short diff --git a/pyproject.toml b/pyproject.toml index 463f172..5b0ad46 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -66,9 +66,10 @@ attune_help = ["templates/**/*.md", "templates/**/*.json", "demos/**/*.md"] [tool.pytest.ini_options] testpaths = ["tests"] -addopts = "-ra" +addopts = "-ra -m 'not live'" markers = [ "slow: tests that create real venvs or otherwise take >1s", + "live: opt-in tests that hit a real LLM API. Skipped by default; require ANTHROPIC_API_KEY and any test-specific env flags.", ] [tool.coverage.run] diff --git a/tests/README.md b/tests/README.md index c318048..6b9f974 100644 --- a/tests/README.md +++ b/tests/README.md @@ -20,20 +20,14 @@ The `slow` marker covers tests that create real venvs (`test_zero_dep_install.py`). Skip with `pytest -m "not slow"` for fast iteration. -## LLM mocking standard +## LLM mocking standard, `live` marker, CI guard, cost policy -attune-help itself makes no LLM calls. Cross-layer integration tests that -*could* exercise an LLM follow the **attune-author reference pattern**: +See **`testing-conventions.md`** in the attune workspace umbrella — +the canonical reference (mocking pattern, `live` marker semantics, CI +guard expectation, cost & quota policy). Applies to all four layers. -- Strip `ANTHROPIC_API_KEY` via an autouse fixture so a misconfigured - test never reaches the network. -- Patch `anthropic.Anthropic` at import time, not at call site. -- Reset module-level singletons (e.g. `_RagPipeline`) between tests with - an autouse fixture so a leaked patch doesn't poison later tests. - -See `attune-author/tests/conftest.py` (`_lenient_polish_by_default`, -`_reset_rag_pipeline`). Pass 2 of the test-strategy spec will formalize -this into a shared `docs/testing-conventions.md` across layers. +attune-help itself makes no LLM calls today; the `live` marker is +registered in `pyproject.toml` so future tests have a consistent home. ## What's tested vs. not