Smart-AI-Memory · silversurfer562 · May 13, 2026 · May 13, 2026
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -35,6 +35,16 @@ jobs:
       - name: Run ruff
         run: python -m ruff check sidecar/
 
+      - name: Guard — ANTHROPIC_API_KEY must NOT be set for default suite
+        run: |
+          if [ -n "${ANTHROPIC_API_KEY:-}" ]; then
+            echo "::error::ANTHROPIC_API_KEY is set in the default test environment."
+            echo "This can cause non-'live'-marked tests to leak real API calls."
+            echo "Either unset the key, or mark the test with @pytest.mark.live."
+            exit 1
+          fi
+        shell: bash
+
       - name: Run tests (with coverage on py3.11)
         run: |
           if [ "${{ matrix.python-version }}" = "3.11" ]; then

diff --git a/pyproject.toml b/pyproject.toml
@@ -96,9 +96,10 @@ ignore = [
 [tool.pytest.ini_options]
 testpaths = ["sidecar/tests"]
 asyncio_mode = "auto"
-addopts = "-ra -m 'not e2e'"
+addopts = "-ra -m 'not e2e and not live'"
 markers = [
     "e2e: end-to-end tests against a real server (Playwright); deselected by default — opt in with ``pytest -m e2e``",
+    "live: opt-in tests that hit a real LLM API. Skipped by default; require ANTHROPIC_API_KEY and any test-specific env flags.",
 ]
 
 [tool.coverage.run]

diff --git a/sidecar/tests/README.md b/sidecar/tests/README.md
@@ -20,20 +20,16 @@ The `e2e` marker covers Playwright tests in `test_living_docs_e2e.py`.
 They need a real uvicorn server and a Chromium install; pass 3 of the
 test-strategy spec will stabilize and unify e2e workflows.
 
-## LLM mocking standard
+## LLM mocking standard, `live` marker, CI guard, cost policy
 
-attune-gui itself makes no LLM calls — the heavy lifting is done by
-`attune-author` and `attune-rag` upstream. Cross-layer integration tests
-follow the **attune-author reference pattern**:
+See **`testing-conventions.md`** in the attune workspace umbrella —
+the canonical reference (mocking pattern, `live` marker semantics, CI
+guard expectation, cost & quota policy). Applies to all four layers.
 
-- Strip `ANTHROPIC_API_KEY` via an autouse fixture.
-- Patch `anthropic.Anthropic` at import time, not at call site.
-- Reset module-level singletons (e.g., `_PIPELINES`) between tests with
-  an autouse fixture.
-
-See `attune-author/tests/conftest.py` (`_lenient_polish_by_default`,
-`_reset_rag_pipeline`). Pass 2 of the test-strategy spec will formalize
-this into a shared `docs/testing-conventions.md` across layers.
+attune-gui makes no LLM calls directly; the heavy lifting is done by
+`attune-author` and `attune-rag` upstream. The `live` marker is
+registered in `pyproject.toml` so any future opt-in tests have a
+consistent home.
 
 ## Contract tests