diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index ff23a81..7637988 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -35,6 +35,16 @@ jobs: - name: Run ruff run: python -m ruff check sidecar/ + - name: Guard — ANTHROPIC_API_KEY must NOT be set for default suite + run: | + if [ -n "${ANTHROPIC_API_KEY:-}" ]; then + echo "::error::ANTHROPIC_API_KEY is set in the default test environment." + echo "This can cause non-'live'-marked tests to leak real API calls." + echo "Either unset the key, or mark the test with @pytest.mark.live." + exit 1 + fi + shell: bash + - name: Run tests (with coverage on py3.11) run: | if [ "${{ matrix.python-version }}" = "3.11" ]; then diff --git a/pyproject.toml b/pyproject.toml index 41018cf..062a253 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -96,9 +96,10 @@ ignore = [ [tool.pytest.ini_options] testpaths = ["sidecar/tests"] asyncio_mode = "auto" -addopts = "-ra -m 'not e2e'" +addopts = "-ra -m 'not e2e and not live'" markers = [ "e2e: end-to-end tests against a real server (Playwright); deselected by default — opt in with ``pytest -m e2e``", + "live: opt-in tests that hit a real LLM API. Skipped by default; require ANTHROPIC_API_KEY and any test-specific env flags.", ] [tool.coverage.run] diff --git a/sidecar/tests/README.md b/sidecar/tests/README.md index b16d298..253d1a2 100644 --- a/sidecar/tests/README.md +++ b/sidecar/tests/README.md @@ -20,20 +20,16 @@ The `e2e` marker covers Playwright tests in `test_living_docs_e2e.py`. They need a real uvicorn server and a Chromium install; pass 3 of the test-strategy spec will stabilize and unify e2e workflows. -## LLM mocking standard +## LLM mocking standard, `live` marker, CI guard, cost policy -attune-gui itself makes no LLM calls — the heavy lifting is done by -`attune-author` and `attune-rag` upstream. Cross-layer integration tests -follow the **attune-author reference pattern**: +See **`testing-conventions.md`** in the attune workspace umbrella — +the canonical reference (mocking pattern, `live` marker semantics, CI +guard expectation, cost & quota policy). Applies to all four layers. -- Strip `ANTHROPIC_API_KEY` via an autouse fixture. -- Patch `anthropic.Anthropic` at import time, not at call site. -- Reset module-level singletons (e.g., `_PIPELINES`) between tests with - an autouse fixture. - -See `attune-author/tests/conftest.py` (`_lenient_polish_by_default`, -`_reset_rag_pipeline`). Pass 2 of the test-strategy spec will formalize -this into a shared `docs/testing-conventions.md` across layers. +attune-gui makes no LLM calls directly; the heavy lifting is done by +`attune-author` and `attune-rag` upstream. The `live` marker is +registered in `pyproject.toml` so any future opt-in tests have a +consistent home. ## Contract tests