diff --git a/.github/workflows/adk-py-test.yaml b/.github/workflows/adk-py-test.yaml
index 2650f60f..adbfa294 100644
--- a/.github/workflows/adk-py-test.yaml
+++ b/.github/workflows/adk-py-test.yaml
@@ -16,7 +16,7 @@ jobs:
       - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4.3.1
 
       - name: Set up mise
-        uses: jdx/mise-action@5228313ee0372e111a38da051671ca30fc5a96db # v3.6.3
+        uses: jdx/mise-action@1648a7812b9aeae629881980618f079932869151 # v4.0.1
         with:
           cache: true
           experimental: true
diff --git a/.github/workflows/checks.yaml b/.github/workflows/checks.yaml
index b4336914..ca7844bf 100644
--- a/.github/workflows/checks.yaml
+++ b/.github/workflows/checks.yaml
@@ -17,7 +17,7 @@ jobs:
         with:
           fetch-depth: 0 # Fetch full history for proper diff
       - name: Set up mise
-        uses: jdx/mise-action@5228313ee0372e111a38da051671ca30fc5a96db # v3.6.3
+        uses: jdx/mise-action@1648a7812b9aeae629881980618f079932869151 # v4.0.1
         with:
           cache: true
           experimental: true
@@ -33,21 +33,20 @@ jobs:
       - name: Ensure SHA pinned actions
         uses: zgosalvez/github-actions-ensure-sha-pinned-actions@70c4af2ed5282c51ba40566d026d6647852ffa3e # v5.0.1
 
-  build:
+  smoke:
     runs-on: ${{ matrix.os }}
-    timeout-minutes: 30
+    timeout-minutes: 20
 
     strategy:
       fail-fast: false
       matrix:
         python-version: ["3.10", "3.11", "3.12", "3.13", "3.14"]
         os: [ubuntu-latest, windows-latest]
-        shard: [0, 1]
 
     steps:
       - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4.3.1
       - name: Set up mise
-        uses: jdx/mise-action@5228313ee0372e111a38da051671ca30fc5a96db # v3.6.3
+        uses: jdx/mise-action@1648a7812b9aeae629881980618f079932869151 # v4.0.1
         with:
           cache: true
           experimental: true
@@ -59,10 +58,33 @@ jobs:
         run: |
           # This is already done by make install-dev, but we're keeping this as a separate step
           # to explicitly verify that installation works
-          mise exec python@${{ matrix.python-version }} -- python -m uv pip install -e ./py[all]
+          mise exec python@${{ matrix.python-version }} -- uv pip install -e ./py[all]
       - name: Test whether the Python SDK can be imported
         run: |
-          mise exec python@${{ matrix.python-version }} -- python -c 'import braintrust'
+          mise exec python@${{ matrix.python-version }} -- uv run --active --no-project python -c 'import braintrust'
+
+  nox:
+    runs-on: ${{ matrix.os }}
+    timeout-minutes: 30
+
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version: ["3.10", "3.11", "3.12", "3.13", "3.14"]
+        os: [ubuntu-latest, windows-latest]
+        shard: [0, 1]
+
+    steps:
+      - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4.3.1
+      - name: Set up mise
+        uses: jdx/mise-action@1648a7812b9aeae629881980618f079932869151 # v4.0.1
+        with:
+          cache: true
+          experimental: true
+          install_args: python@${{ matrix.python-version }}
+      - name: Install dependencies
+        run: |
+          mise exec python@${{ matrix.python-version }} -- make -C py install-dev
       - name: Run nox tests (shard ${{ matrix.shard }}/2)
         shell: bash
         run: |
@@ -75,13 +97,15 @@ jobs:
     uses: ./.github/workflows/langchain-py-test.yaml
 
   upload-wheel:
-    needs: build
+    needs:
+      - smoke
+      - nox
     runs-on: ubuntu-latest
     timeout-minutes: 10
     steps:
       - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4.3.1
       - name: Set up mise
-        uses: jdx/mise-action@5228313ee0372e111a38da051671ca30fc5a96db # v3.6.3
+        uses: jdx/mise-action@1648a7812b9aeae629881980618f079932869151 # v4.0.1
         with:
           cache: true
           experimental: true
@@ -100,7 +124,8 @@ jobs:
     needs:
       - lint
       - ensure-pinned-actions
-      - build
+      - smoke
+      - nox
       - adk-py
       - langchain-py
       - upload-wheel
@@ -124,7 +149,8 @@ jobs:
 
           check_result "lint" "${{ needs.lint.result }}"
           check_result "ensure-pinned-actions" "${{ needs.ensure-pinned-actions.result }}"
-          check_result "build" "${{ needs.build.result }}"
+          check_result "smoke" "${{ needs.smoke.result }}"
+          check_result "nox" "${{ needs.nox.result }}"
           check_result "adk-py" "${{ needs.adk-py.result }}"
           check_result "langchain-py" "${{ needs.langchain-py.result }}"
           check_result "upload-wheel" "${{ needs.upload-wheel.result }}"
diff --git a/.github/workflows/langchain-py-test.yaml b/.github/workflows/langchain-py-test.yaml
index 54ac8df8..c49495f6 100644
--- a/.github/workflows/langchain-py-test.yaml
+++ b/.github/workflows/langchain-py-test.yaml
@@ -12,7 +12,7 @@ jobs:
       - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4.3.1
 
       - name: Set up mise
-        uses: jdx/mise-action@5228313ee0372e111a38da051671ca30fc5a96db # v3.6.3
+        uses: jdx/mise-action@1648a7812b9aeae629881980618f079932869151 # v4.0.1
         with:
           cache: true
           experimental: true
diff --git a/.github/workflows/publish-py-sdk.yaml b/.github/workflows/publish-py-sdk.yaml
index d729cf0b..7a88c204 100644
--- a/.github/workflows/publish-py-sdk.yaml
+++ b/.github/workflows/publish-py-sdk.yaml
@@ -38,7 +38,7 @@ jobs:
           ref: ${{ github.event.inputs.ref }}
           fetch-depth: 0
       - name: Set up mise
-        uses: jdx/mise-action@5228313ee0372e111a38da051671ca30fc5a96db # v3.6.3
+        uses: jdx/mise-action@1648a7812b9aeae629881980618f079932869151 # v4.0.1
         with:
           cache: true
           experimental: true
@@ -71,7 +71,7 @@ jobs:
           ref: ${{ env.COMMIT_SHA }}
           fetch-depth: 0
       - name: Set up mise
-        uses: jdx/mise-action@5228313ee0372e111a38da051671ca30fc5a96db # v3.6.3
+        uses: jdx/mise-action@1648a7812b9aeae629881980618f079932869151 # v4.0.1
         with:
           cache: true
           experimental: true
diff --git a/.github/workflows/test-publish-py-sdk.yaml b/.github/workflows/test-publish-py-sdk.yaml
index 3e41a430..9c418ee4 100644
--- a/.github/workflows/test-publish-py-sdk.yaml
+++ b/.github/workflows/test-publish-py-sdk.yaml
@@ -42,7 +42,7 @@ jobs:
           ref: ${{ github.event.inputs.ref }}
           fetch-depth: 0
       - name: Set up mise
-        uses: jdx/mise-action@5228313ee0372e111a38da051671ca30fc5a96db # v3.6.3
+        uses: jdx/mise-action@1648a7812b9aeae629881980618f079932869151 # v4.0.1
         with:
           cache: true
           experimental: true
@@ -91,7 +91,7 @@ jobs:
           ref: ${{ env.COMMIT_SHA }}
           fetch-depth: 0
       - name: Set up mise
-        uses: jdx/mise-action@5228313ee0372e111a38da051671ca30fc5a96db # v3.6.3
+        uses: jdx/mise-action@1648a7812b9aeae629881980618f079932869151 # v4.0.1
         with:
           cache: true
           experimental: true
diff --git a/py/noxfile.py b/py/noxfile.py
index 05dcf9c1..89aa9082 100644
--- a/py/noxfile.py
+++ b/py/noxfile.py
@@ -86,8 +86,9 @@ def _pinned_python_version():
 OPENAI_VERSIONS = (LATEST, "1.77.0", "1.71", "1.91", "1.92")
 OPENAI_AGENTS_VERSIONS = (LATEST, "0.0.19")
 # litellm latest requires Python >= 3.10
-# Pin litellm because 1.82.7-1.82.8 are compromised: https://github.com/BerriAI/litellm/issues/24512
-LITELLM_VERSIONS = ("1.82.0", "1.74.0")
+# Pin litellm to a version without the 1.82.7-1.82.8 compromise and with the
+# OIDC userinfo cache key collision fix from 1.83.0+
+LITELLM_VERSIONS = ("1.83.0", "1.74.0")
 # CLI bundling started in 0.1.10 - older versions require external Claude Code installation
 CLAUDE_AGENT_SDK_VERSIONS = (LATEST, "0.1.10")
 # Keep LATEST for newest API coverage, and pin 2.4.0 to cover the 2.4 -> 2.5 breaking change
diff --git a/py/requirements-optional.txt b/py/requirements-optional.txt
index 53a423c7..0ef60446 100644
--- a/py/requirements-optional.txt
+++ b/py/requirements-optional.txt
@@ -7,7 +7,7 @@ langchain-anthropic==1.4.0
 langchain-core==1.2.22
 langchain-openai==1.1.12
 langsmith==0.7.12
-litellm==1.82.0
+litellm==1.83.0
 openai==2.24.0
 openrouter==0.7.11
 pydantic_ai==1.66.0
diff --git a/py/setup.py b/py/setup.py
index ee387304..3055a91a 100644
--- a/py/setup.py
+++ b/py/setup.py
@@ -1,4 +1,5 @@
 import os
+import sysconfig
 
 import setuptools
 
@@ -27,14 +28,24 @@
     "wrapt",
 ]
 
+is_free_threaded = bool(sysconfig.get_config_var("Py_GIL_DISABLED"))
+is_windows = os.name == "nt"
+
+# orjson is not compatible with PyPy or free-threaded Python, so only expose it
+# for standard CPython builds where it is supported.
+performance_require = [] if is_free_threaded else ["orjson; platform_python_implementation != 'PyPy'"]
+
+# temporalio does not currently install cleanly on Windows free-threaded Python,
+# so leave the optional integration available everywhere else.
+temporal_require = [] if is_free_threaded and is_windows else ["temporalio>=1.19.0; python_version>='3.10'"]
+
 extras_require = {
-    "cli": ["boto3", "psycopg2-binary", "uv", "starlette", "uvicorn"],
+    "cli": ["boto3", "uv", "starlette", "uvicorn"],
     "doc": ["pydoc-markdown"],
     "openai-agents": ["openai-agents"],
     "otel": ["opentelemetry-api", "opentelemetry-sdk", "opentelemetry-exporter-otlp-proto-http"],
-    # orjson is not compatible with PyPy, so we exclude it for that platform
-    "performance": ["orjson; platform_python_implementation != 'PyPy'"],
-    "temporal": ["temporalio>=1.19.0; python_version>='3.10'"],
+    "performance": performance_require,
+    "temporal": temporal_require,
 }
 
 extras_require["all"] = sorted({package for packages in extras_require.values() for package in packages})
diff --git a/py/src/braintrust/integrations/pydantic_ai/patchers.py b/py/src/braintrust/integrations/pydantic_ai/patchers.py
index 0335fcd7..fd164451 100644
--- a/py/src/braintrust/integrations/pydantic_ai/patchers.py
+++ b/py/src/braintrust/integrations/pydantic_ai/patchers.py
@@ -134,14 +134,18 @@ class StreamedResponseSyncStartProducerPatcher(FunctionWrapperPatcher):
 
 class _ToolManagerExecuteFunctionToolPatcher(FunctionWrapperPatcher):
     name = "pydantic_ai.tool_manager.execute_function_tool"
-    target_module = "pydantic_ai._tool_manager"
+    # Regression compatibility note: pydantic_ai 1.78.0 moved ToolManager out
+    # of the private ``pydantic_ai._tool_manager`` module into
+    # ``pydantic_ai.tool_manager``. ``pydantic_ai._agent_graph.ToolManager`` is
+    # a stable alias in both the old and new layouts, so patch that seam.
+    target_module = "pydantic_ai._agent_graph"
     target_path = "ToolManager._execute_function_tool_call"
     wrapper = _tool_manager_execute_function_tool_wrapper
 
 
 class _ToolManagerCallFunctionToolPatcher(FunctionWrapperPatcher):
     name = "pydantic_ai.tool_manager.call_function_tool"
-    target_module = "pydantic_ai._tool_manager"
+    target_module = "pydantic_ai._agent_graph"
     target_path = "ToolManager._call_function_tool"
     wrapper = _tool_manager_call_function_tool_wrapper
     superseded_by = (_ToolManagerExecuteFunctionToolPatcher,)
diff --git a/py/src/braintrust/integrations/pydantic_ai/test_pydantic_ai_integration.py b/py/src/braintrust/integrations/pydantic_ai/test_pydantic_ai_integration.py
index 8c29f8d9..38369d14 100644
--- a/py/src/braintrust/integrations/pydantic_ai/test_pydantic_ai_integration.py
+++ b/py/src/braintrust/integrations/pydantic_ai/test_pydantic_ai_integration.py
@@ -2248,7 +2248,7 @@ def test_wrap_model_classes_is_deprecated(monkeypatch):
 
 
 def test_setup_pydantic_ai_is_idempotent_across_new_patch_points():
-    import pydantic_ai._tool_manager as tool_manager_module
+    import pydantic_ai._agent_graph as agent_graph_module
     import pydantic_ai.direct as direct_module
     from braintrust.integrations.pydantic_ai.integration import PydanticAIIntegration
     from pydantic_ai.agent.abstract import AbstractAgent
@@ -2257,15 +2257,15 @@ def test_setup_pydantic_ai_is_idempotent_across_new_patch_points():
     prepare_model = direct_module.__dict__["_prepare_model"]
     tool_method_name = (
         "_execute_function_tool_call"
-        if "_execute_function_tool_call" in tool_manager_module.ToolManager.__dict__
+        if "_execute_function_tool_call" in agent_graph_module.ToolManager.__dict__
         else "_call_function_tool"
     )
-    tool_method = tool_manager_module.ToolManager.__dict__[tool_method_name]
+    tool_method = agent_graph_module.ToolManager.__dict__[tool_method_name]
 
     assert PydanticAIIntegration.setup() is True
     assert AbstractAgent.__dict__["run"] is run
     assert direct_module.__dict__["_prepare_model"] is prepare_model
-    assert tool_manager_module.ToolManager.__dict__[tool_method_name] is tool_method
+    assert agent_graph_module.ToolManager.__dict__[tool_method_name] is tool_method
 
 
 def test_serialize_content_part_with_binary_content():