diff --git a/.github/workflows/adk-py-test.yaml b/.github/workflows/adk-py-test.yaml index 2650f60f..adbfa294 100644 --- a/.github/workflows/adk-py-test.yaml +++ b/.github/workflows/adk-py-test.yaml @@ -16,7 +16,7 @@ jobs: - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4.3.1 - name: Set up mise - uses: jdx/mise-action@5228313ee0372e111a38da051671ca30fc5a96db # v3.6.3 + uses: jdx/mise-action@1648a7812b9aeae629881980618f079932869151 # v4.0.1 with: cache: true experimental: true diff --git a/.github/workflows/checks.yaml b/.github/workflows/checks.yaml index b4336914..ca7844bf 100644 --- a/.github/workflows/checks.yaml +++ b/.github/workflows/checks.yaml @@ -17,7 +17,7 @@ jobs: with: fetch-depth: 0 # Fetch full history for proper diff - name: Set up mise - uses: jdx/mise-action@5228313ee0372e111a38da051671ca30fc5a96db # v3.6.3 + uses: jdx/mise-action@1648a7812b9aeae629881980618f079932869151 # v4.0.1 with: cache: true experimental: true @@ -33,21 +33,20 @@ jobs: - name: Ensure SHA pinned actions uses: zgosalvez/github-actions-ensure-sha-pinned-actions@70c4af2ed5282c51ba40566d026d6647852ffa3e # v5.0.1 - build: + smoke: runs-on: ${{ matrix.os }} - timeout-minutes: 30 + timeout-minutes: 20 strategy: fail-fast: false matrix: python-version: ["3.10", "3.11", "3.12", "3.13", "3.14"] os: [ubuntu-latest, windows-latest] - shard: [0, 1] steps: - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4.3.1 - name: Set up mise - uses: jdx/mise-action@5228313ee0372e111a38da051671ca30fc5a96db # v3.6.3 + uses: jdx/mise-action@1648a7812b9aeae629881980618f079932869151 # v4.0.1 with: cache: true experimental: true @@ -59,10 +58,33 @@ jobs: run: | # This is already done by make install-dev, but we're keeping this as a separate step # to explicitly verify that installation works - mise exec python@${{ matrix.python-version }} -- python -m uv pip install -e ./py[all] + mise exec python@${{ matrix.python-version }} -- uv pip install -e ./py[all] - name: Test whether the Python SDK can be imported run: | - mise exec python@${{ matrix.python-version }} -- python -c 'import braintrust' + mise exec python@${{ matrix.python-version }} -- uv run --active --no-project python -c 'import braintrust' + + nox: + runs-on: ${{ matrix.os }} + timeout-minutes: 30 + + strategy: + fail-fast: false + matrix: + python-version: ["3.10", "3.11", "3.12", "3.13", "3.14"] + os: [ubuntu-latest, windows-latest] + shard: [0, 1] + + steps: + - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4.3.1 + - name: Set up mise + uses: jdx/mise-action@1648a7812b9aeae629881980618f079932869151 # v4.0.1 + with: + cache: true + experimental: true + install_args: python@${{ matrix.python-version }} + - name: Install dependencies + run: | + mise exec python@${{ matrix.python-version }} -- make -C py install-dev - name: Run nox tests (shard ${{ matrix.shard }}/2) shell: bash run: | @@ -75,13 +97,15 @@ jobs: uses: ./.github/workflows/langchain-py-test.yaml upload-wheel: - needs: build + needs: + - smoke + - nox runs-on: ubuntu-latest timeout-minutes: 10 steps: - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4.3.1 - name: Set up mise - uses: jdx/mise-action@5228313ee0372e111a38da051671ca30fc5a96db # v3.6.3 + uses: jdx/mise-action@1648a7812b9aeae629881980618f079932869151 # v4.0.1 with: cache: true experimental: true @@ -100,7 +124,8 @@ jobs: needs: - lint - ensure-pinned-actions - - build + - smoke + - nox - adk-py - langchain-py - upload-wheel @@ -124,7 +149,8 @@ jobs: check_result "lint" "${{ needs.lint.result }}" check_result "ensure-pinned-actions" "${{ needs.ensure-pinned-actions.result }}" - check_result "build" "${{ needs.build.result }}" + check_result "smoke" "${{ needs.smoke.result }}" + check_result "nox" "${{ needs.nox.result }}" check_result "adk-py" "${{ needs.adk-py.result }}" check_result "langchain-py" "${{ needs.langchain-py.result }}" check_result "upload-wheel" "${{ needs.upload-wheel.result }}" diff --git a/.github/workflows/langchain-py-test.yaml b/.github/workflows/langchain-py-test.yaml index 54ac8df8..c49495f6 100644 --- a/.github/workflows/langchain-py-test.yaml +++ b/.github/workflows/langchain-py-test.yaml @@ -12,7 +12,7 @@ jobs: - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4.3.1 - name: Set up mise - uses: jdx/mise-action@5228313ee0372e111a38da051671ca30fc5a96db # v3.6.3 + uses: jdx/mise-action@1648a7812b9aeae629881980618f079932869151 # v4.0.1 with: cache: true experimental: true diff --git a/.github/workflows/publish-py-sdk.yaml b/.github/workflows/publish-py-sdk.yaml index d729cf0b..7a88c204 100644 --- a/.github/workflows/publish-py-sdk.yaml +++ b/.github/workflows/publish-py-sdk.yaml @@ -38,7 +38,7 @@ jobs: ref: ${{ github.event.inputs.ref }} fetch-depth: 0 - name: Set up mise - uses: jdx/mise-action@5228313ee0372e111a38da051671ca30fc5a96db # v3.6.3 + uses: jdx/mise-action@1648a7812b9aeae629881980618f079932869151 # v4.0.1 with: cache: true experimental: true @@ -71,7 +71,7 @@ jobs: ref: ${{ env.COMMIT_SHA }} fetch-depth: 0 - name: Set up mise - uses: jdx/mise-action@5228313ee0372e111a38da051671ca30fc5a96db # v3.6.3 + uses: jdx/mise-action@1648a7812b9aeae629881980618f079932869151 # v4.0.1 with: cache: true experimental: true diff --git a/.github/workflows/test-publish-py-sdk.yaml b/.github/workflows/test-publish-py-sdk.yaml index 3e41a430..9c418ee4 100644 --- a/.github/workflows/test-publish-py-sdk.yaml +++ b/.github/workflows/test-publish-py-sdk.yaml @@ -42,7 +42,7 @@ jobs: ref: ${{ github.event.inputs.ref }} fetch-depth: 0 - name: Set up mise - uses: jdx/mise-action@5228313ee0372e111a38da051671ca30fc5a96db # v3.6.3 + uses: jdx/mise-action@1648a7812b9aeae629881980618f079932869151 # v4.0.1 with: cache: true experimental: true @@ -91,7 +91,7 @@ jobs: ref: ${{ env.COMMIT_SHA }} fetch-depth: 0 - name: Set up mise - uses: jdx/mise-action@5228313ee0372e111a38da051671ca30fc5a96db # v3.6.3 + uses: jdx/mise-action@1648a7812b9aeae629881980618f079932869151 # v4.0.1 with: cache: true experimental: true diff --git a/py/noxfile.py b/py/noxfile.py index 05dcf9c1..89aa9082 100644 --- a/py/noxfile.py +++ b/py/noxfile.py @@ -86,8 +86,9 @@ def _pinned_python_version(): OPENAI_VERSIONS = (LATEST, "1.77.0", "1.71", "1.91", "1.92") OPENAI_AGENTS_VERSIONS = (LATEST, "0.0.19") # litellm latest requires Python >= 3.10 -# Pin litellm because 1.82.7-1.82.8 are compromised: https://github.com/BerriAI/litellm/issues/24512 -LITELLM_VERSIONS = ("1.82.0", "1.74.0") +# Pin litellm to a version without the 1.82.7-1.82.8 compromise and with the +# OIDC userinfo cache key collision fix from 1.83.0+ +LITELLM_VERSIONS = ("1.83.0", "1.74.0") # CLI bundling started in 0.1.10 - older versions require external Claude Code installation CLAUDE_AGENT_SDK_VERSIONS = (LATEST, "0.1.10") # Keep LATEST for newest API coverage, and pin 2.4.0 to cover the 2.4 -> 2.5 breaking change diff --git a/py/requirements-optional.txt b/py/requirements-optional.txt index 53a423c7..0ef60446 100644 --- a/py/requirements-optional.txt +++ b/py/requirements-optional.txt @@ -7,7 +7,7 @@ langchain-anthropic==1.4.0 langchain-core==1.2.22 langchain-openai==1.1.12 langsmith==0.7.12 -litellm==1.82.0 +litellm==1.83.0 openai==2.24.0 openrouter==0.7.11 pydantic_ai==1.66.0 diff --git a/py/setup.py b/py/setup.py index ee387304..3055a91a 100644 --- a/py/setup.py +++ b/py/setup.py @@ -1,4 +1,5 @@ import os +import sysconfig import setuptools @@ -27,14 +28,24 @@ "wrapt", ] +is_free_threaded = bool(sysconfig.get_config_var("Py_GIL_DISABLED")) +is_windows = os.name == "nt" + +# orjson is not compatible with PyPy or free-threaded Python, so only expose it +# for standard CPython builds where it is supported. +performance_require = [] if is_free_threaded else ["orjson; platform_python_implementation != 'PyPy'"] + +# temporalio does not currently install cleanly on Windows free-threaded Python, +# so leave the optional integration available everywhere else. +temporal_require = [] if is_free_threaded and is_windows else ["temporalio>=1.19.0; python_version>='3.10'"] + extras_require = { - "cli": ["boto3", "psycopg2-binary", "uv", "starlette", "uvicorn"], + "cli": ["boto3", "uv", "starlette", "uvicorn"], "doc": ["pydoc-markdown"], "openai-agents": ["openai-agents"], "otel": ["opentelemetry-api", "opentelemetry-sdk", "opentelemetry-exporter-otlp-proto-http"], - # orjson is not compatible with PyPy, so we exclude it for that platform - "performance": ["orjson; platform_python_implementation != 'PyPy'"], - "temporal": ["temporalio>=1.19.0; python_version>='3.10'"], + "performance": performance_require, + "temporal": temporal_require, } extras_require["all"] = sorted({package for packages in extras_require.values() for package in packages}) diff --git a/py/src/braintrust/integrations/pydantic_ai/patchers.py b/py/src/braintrust/integrations/pydantic_ai/patchers.py index 0335fcd7..fd164451 100644 --- a/py/src/braintrust/integrations/pydantic_ai/patchers.py +++ b/py/src/braintrust/integrations/pydantic_ai/patchers.py @@ -134,14 +134,18 @@ class StreamedResponseSyncStartProducerPatcher(FunctionWrapperPatcher): class _ToolManagerExecuteFunctionToolPatcher(FunctionWrapperPatcher): name = "pydantic_ai.tool_manager.execute_function_tool" - target_module = "pydantic_ai._tool_manager" + # Regression compatibility note: pydantic_ai 1.78.0 moved ToolManager out + # of the private ``pydantic_ai._tool_manager`` module into + # ``pydantic_ai.tool_manager``. ``pydantic_ai._agent_graph.ToolManager`` is + # a stable alias in both the old and new layouts, so patch that seam. + target_module = "pydantic_ai._agent_graph" target_path = "ToolManager._execute_function_tool_call" wrapper = _tool_manager_execute_function_tool_wrapper class _ToolManagerCallFunctionToolPatcher(FunctionWrapperPatcher): name = "pydantic_ai.tool_manager.call_function_tool" - target_module = "pydantic_ai._tool_manager" + target_module = "pydantic_ai._agent_graph" target_path = "ToolManager._call_function_tool" wrapper = _tool_manager_call_function_tool_wrapper superseded_by = (_ToolManagerExecuteFunctionToolPatcher,) diff --git a/py/src/braintrust/integrations/pydantic_ai/test_pydantic_ai_integration.py b/py/src/braintrust/integrations/pydantic_ai/test_pydantic_ai_integration.py index 8c29f8d9..38369d14 100644 --- a/py/src/braintrust/integrations/pydantic_ai/test_pydantic_ai_integration.py +++ b/py/src/braintrust/integrations/pydantic_ai/test_pydantic_ai_integration.py @@ -2248,7 +2248,7 @@ def test_wrap_model_classes_is_deprecated(monkeypatch): def test_setup_pydantic_ai_is_idempotent_across_new_patch_points(): - import pydantic_ai._tool_manager as tool_manager_module + import pydantic_ai._agent_graph as agent_graph_module import pydantic_ai.direct as direct_module from braintrust.integrations.pydantic_ai.integration import PydanticAIIntegration from pydantic_ai.agent.abstract import AbstractAgent @@ -2257,15 +2257,15 @@ def test_setup_pydantic_ai_is_idempotent_across_new_patch_points(): prepare_model = direct_module.__dict__["_prepare_model"] tool_method_name = ( "_execute_function_tool_call" - if "_execute_function_tool_call" in tool_manager_module.ToolManager.__dict__ + if "_execute_function_tool_call" in agent_graph_module.ToolManager.__dict__ else "_call_function_tool" ) - tool_method = tool_manager_module.ToolManager.__dict__[tool_method_name] + tool_method = agent_graph_module.ToolManager.__dict__[tool_method_name] assert PydanticAIIntegration.setup() is True assert AbstractAgent.__dict__["run"] is run assert direct_module.__dict__["_prepare_model"] is prepare_model - assert tool_manager_module.ToolManager.__dict__[tool_method_name] is tool_method + assert agent_graph_module.ToolManager.__dict__[tool_method_name] is tool_method def test_serialize_content_part_with_binary_content():