From ee034b57bfce1ba2df3de42f69a4c6c836f38709 Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Mon, 9 Mar 2026 21:56:57 +0000 Subject: [PATCH 1/2] add 12.9.6 release notes --- .../docs/source/release/12.9.6-notes.rst | 64 +++++++++++++++++++ .../docs/source/release/12.9.6-notes.rst | 16 +++++ .../docs/source/release/13.2.0-notes.rst | 2 +- 3 files changed, 81 insertions(+), 1 deletion(-) create mode 100644 cuda_bindings/docs/source/release/12.9.6-notes.rst create mode 100644 cuda_python/docs/source/release/12.9.6-notes.rst diff --git a/cuda_bindings/docs/source/release/12.9.6-notes.rst b/cuda_bindings/docs/source/release/12.9.6-notes.rst new file mode 100644 index 0000000000..dd508ff147 --- /dev/null +++ b/cuda_bindings/docs/source/release/12.9.6-notes.rst @@ -0,0 +1,64 @@ +.. SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +.. SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE + +.. module:: cuda.bindings + +``cuda-bindings`` 12.9.6 Release notes +====================================== + +Highlights +---------- + +* ``cuda.bindings.nvml`` has graduated from experimental (``cuda.bindings._nvml``) + to a fully supported public module with extensive handwritten Pythonic API + coverage spanning ~170 functions across system queries, device discovery, + memory, power, clocks, utilization, thermals, NVLink, and device configuration. + (`PR #1524 `_, + `PR #1548 `_) +* Add ``nvFatbin`` bindings. + (`PR #1467 `_) +* Performance improvement: ``cuda.bindings`` now uses a faster ``enum`` + implementation, rather than the standard library's ``enum.IntEnum``. + This leads to much faster import times, and slightly faster attribute access + times. + (`PR #1581 `_) +* Multiple performance improvements cumulatively reducing Python-to-C call + overhead through faster ``void *`` conversion, faster result returning, + optimized enum-to-vector conversion, and stack-allocated small arrays. + +Bugfixes +-------- + +* Fixed an issue where the ``CU_POINTER_ATTRIBUTE_DEVICE_ORDINAL`` attribute was + retrieved as an unsigned int, rather than a signed int. + (`PR #1336 `_) +* Fixed a use-after-free in ``_HelperInputVoidPtr`` properties when backed by + Python buffer objects. + (`PR #1629 `_) + +Miscellaneous +------------- + +* Faster ``void *`` conversion using stack-allocated buffers instead of heap + allocation. + (`PR #1616 `_) +* Faster returning of results from driver, runtime, and NVRTC bindings. + (`PR #1647 `_, + `PR #1656 `_) +* Faster conversion of enum sequences to vectors by eliminating temporary + Python objects. + (`PR #1667 `_) +* Stack-allocated small numeric arrays in driver bindings, reducing heap + allocation overhead. + (`PR #1545 `_) +* NVML bindings now use ``cuda_pathfinder`` for library discovery, consistent + with other CUDA libraries. + (`PR #1661 `_) +* ``CUDA_HOME`` is no longer required at metadata resolution time (e.g. + ``pip install --dry-run``, ``uv lock``); it is only needed at actual build time. + (`PR #1652 `_) + +Known issues +------------ + +* Updating from older versions (v12.6.2.post1 and below) via ``pip install -U cuda-python`` might not work. Please do a clean re-installation by uninstalling ``pip uninstall -y cuda-python`` followed by installing ``pip install cuda-python``. diff --git a/cuda_python/docs/source/release/12.9.6-notes.rst b/cuda_python/docs/source/release/12.9.6-notes.rst new file mode 100644 index 0000000000..86ed747cfc --- /dev/null +++ b/cuda_python/docs/source/release/12.9.6-notes.rst @@ -0,0 +1,16 @@ +.. SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +.. SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE + +CUDA Python 12.9.6 Release notes +================================ + +Included components +------------------- + +* `cuda.bindings 12.9.6 `_ +* `cuda.pathfinder 1.4.2 `_ + +Known issues +------------ + +* Updating from older versions (v12.6.2.post1 and below) via ``pip install -U cuda-python`` might not work. Please do a clean re-installation by uninstalling ``pip uninstall -y cuda-python`` followed by installing ``pip install cuda-python``. diff --git a/cuda_python/docs/source/release/13.2.0-notes.rst b/cuda_python/docs/source/release/13.2.0-notes.rst index 91f785a68f..a0bb3193ab 100644 --- a/cuda_python/docs/source/release/13.2.0-notes.rst +++ b/cuda_python/docs/source/release/13.2.0-notes.rst @@ -1,4 +1,4 @@ -.. SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +.. SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. .. SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE CUDA Python 13.2.0 Release notes From df536b404332802ad0d6b353a62ce11023191399 Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Mon, 9 Mar 2026 21:57:56 +0000 Subject: [PATCH 2/2] test against 13.2 --- ci/test-matrix.yml | 42 +++++++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/ci/test-matrix.yml b/ci/test-matrix.yml index 2f8454209a..b1adcee61a 100644 --- a/ci/test-matrix.yml +++ b/ci/test-matrix.yml @@ -20,46 +20,46 @@ linux: # linux-64 - { ARCH: 'amd64', PY_VER: '3.10', CUDA_VER: '12.9.1', LOCAL_CTK: '1', GPU: 'v100', GPU_COUNT: '1', DRIVER: 'latest' } - { ARCH: 'amd64', PY_VER: '3.10', CUDA_VER: '13.0.2', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest' } - - { ARCH: 'amd64', PY_VER: '3.10', CUDA_VER: '13.1.1', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest' } + - { ARCH: 'amd64', PY_VER: '3.10', CUDA_VER: '13.2.0', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest' } - { ARCH: 'amd64', PY_VER: '3.11', CUDA_VER: '12.9.1', LOCAL_CTK: '0', GPU: 'rtxpro6000', GPU_COUNT: '1', DRIVER: 'latest' } - { ARCH: 'amd64', PY_VER: '3.11', CUDA_VER: '13.0.2', LOCAL_CTK: '1', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest' } - - { ARCH: 'amd64', PY_VER: '3.11', CUDA_VER: '13.1.1', LOCAL_CTK: '1', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest' } + - { ARCH: 'amd64', PY_VER: '3.11', CUDA_VER: '13.2.0', LOCAL_CTK: '1', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest' } - { ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '12.9.1', LOCAL_CTK: '1', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest' } - { ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '13.0.2', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest' } - - { ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '13.1.1', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest' } + - { ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '13.2.0', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest' } - { ARCH: 'amd64', PY_VER: '3.13', CUDA_VER: '12.9.1', LOCAL_CTK: '0', GPU: 'v100', GPU_COUNT: '1', DRIVER: 'latest' } - { ARCH: 'amd64', PY_VER: '3.13', CUDA_VER: '13.0.2', LOCAL_CTK: '1', GPU: 'rtxpro6000', GPU_COUNT: '1', DRIVER: 'latest' } - - { ARCH: 'amd64', PY_VER: '3.13', CUDA_VER: '13.1.1', LOCAL_CTK: '1', GPU: 'rtxpro6000', GPU_COUNT: '1', DRIVER: 'latest' } + - { ARCH: 'amd64', PY_VER: '3.13', CUDA_VER: '13.2.0', LOCAL_CTK: '1', GPU: 'rtxpro6000', GPU_COUNT: '1', DRIVER: 'latest' } - { ARCH: 'amd64', PY_VER: '3.14', CUDA_VER: '12.9.1', LOCAL_CTK: '0', GPU: 't4', GPU_COUNT: '1', DRIVER: 'latest' } - { ARCH: 'amd64', PY_VER: '3.14', CUDA_VER: '13.0.2', LOCAL_CTK: '1', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest' } - - { ARCH: 'amd64', PY_VER: '3.14', CUDA_VER: '13.1.1', LOCAL_CTK: '1', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest' } + - { ARCH: 'amd64', PY_VER: '3.14', CUDA_VER: '13.2.0', LOCAL_CTK: '1', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest' } - { ARCH: 'amd64', PY_VER: '3.14t', CUDA_VER: '12.9.1', LOCAL_CTK: '1', GPU: 't4', GPU_COUNT: '1', DRIVER: 'latest' } - { ARCH: 'amd64', PY_VER: '3.14t', CUDA_VER: '13.0.2', LOCAL_CTK: '1', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest' } - - { ARCH: 'amd64', PY_VER: '3.14t', CUDA_VER: '13.1.1', LOCAL_CTK: '1', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest' } + - { ARCH: 'amd64', PY_VER: '3.14t', CUDA_VER: '13.2.0', LOCAL_CTK: '1', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest' } # linux-aarch64 - { ARCH: 'arm64', PY_VER: '3.10', CUDA_VER: '12.9.1', LOCAL_CTK: '1', GPU: 'a100', GPU_COUNT: '1', DRIVER: 'latest' } - { ARCH: 'arm64', PY_VER: '3.10', CUDA_VER: '13.0.2', LOCAL_CTK: '0', GPU: 'a100', GPU_COUNT: '1', DRIVER: 'latest' } - - { ARCH: 'arm64', PY_VER: '3.10', CUDA_VER: '13.1.1', LOCAL_CTK: '0', GPU: 'a100', GPU_COUNT: '1', DRIVER: 'latest' } + - { ARCH: 'arm64', PY_VER: '3.10', CUDA_VER: '13.2.0', LOCAL_CTK: '0', GPU: 'a100', GPU_COUNT: '1', DRIVER: 'latest' } - { ARCH: 'arm64', PY_VER: '3.11', CUDA_VER: '12.9.1', LOCAL_CTK: '0', GPU: 'a100', GPU_COUNT: '1', DRIVER: 'latest' } - { ARCH: 'arm64', PY_VER: '3.11', CUDA_VER: '13.0.2', LOCAL_CTK: '1', GPU: 'a100', GPU_COUNT: '1', DRIVER: 'latest' } - - { ARCH: 'arm64', PY_VER: '3.11', CUDA_VER: '13.1.1', LOCAL_CTK: '1', GPU: 'a100', GPU_COUNT: '1', DRIVER: 'latest' } + - { ARCH: 'arm64', PY_VER: '3.11', CUDA_VER: '13.2.0', LOCAL_CTK: '1', GPU: 'a100', GPU_COUNT: '1', DRIVER: 'latest' } - { ARCH: 'arm64', PY_VER: '3.12', CUDA_VER: '12.9.1', LOCAL_CTK: '1', GPU: 'a100', GPU_COUNT: '1', DRIVER: 'latest' } - { ARCH: 'arm64', PY_VER: '3.12', CUDA_VER: '13.0.2', LOCAL_CTK: '0', GPU: 'a100', GPU_COUNT: '1', DRIVER: 'latest' } - - { ARCH: 'arm64', PY_VER: '3.12', CUDA_VER: '13.1.1', LOCAL_CTK: '0', GPU: 'a100', GPU_COUNT: '1', DRIVER: 'latest' } + - { ARCH: 'arm64', PY_VER: '3.12', CUDA_VER: '13.2.0', LOCAL_CTK: '0', GPU: 'a100', GPU_COUNT: '1', DRIVER: 'latest' } - { ARCH: 'arm64', PY_VER: '3.13', CUDA_VER: '12.9.1', LOCAL_CTK: '0', GPU: 'a100', GPU_COUNT: '1', DRIVER: 'latest' } - { ARCH: 'arm64', PY_VER: '3.13', CUDA_VER: '13.0.2', LOCAL_CTK: '1', GPU: 'a100', GPU_COUNT: '1', DRIVER: 'latest' } - - { ARCH: 'arm64', PY_VER: '3.13', CUDA_VER: '13.1.1', LOCAL_CTK: '1', GPU: 'a100', GPU_COUNT: '1', DRIVER: 'latest' } + - { ARCH: 'arm64', PY_VER: '3.13', CUDA_VER: '13.2.0', LOCAL_CTK: '1', GPU: 'a100', GPU_COUNT: '1', DRIVER: 'latest' } - { ARCH: 'arm64', PY_VER: '3.14', CUDA_VER: '12.9.1', LOCAL_CTK: '0', GPU: 'a100', GPU_COUNT: '1', DRIVER: 'latest' } - { ARCH: 'arm64', PY_VER: '3.14', CUDA_VER: '13.0.2', LOCAL_CTK: '1', GPU: 'a100', GPU_COUNT: '1', DRIVER: 'latest' } - - { ARCH: 'arm64', PY_VER: '3.14', CUDA_VER: '13.1.1', LOCAL_CTK: '1', GPU: 'a100', GPU_COUNT: '1', DRIVER: 'latest' } + - { ARCH: 'arm64', PY_VER: '3.14', CUDA_VER: '13.2.0', LOCAL_CTK: '1', GPU: 'a100', GPU_COUNT: '1', DRIVER: 'latest' } - { ARCH: 'arm64', PY_VER: '3.14t', CUDA_VER: '12.9.1', LOCAL_CTK: '1', GPU: 'a100', GPU_COUNT: '1', DRIVER: 'latest' } - { ARCH: 'arm64', PY_VER: '3.14t', CUDA_VER: '13.0.2', LOCAL_CTK: '0', GPU: 'a100', GPU_COUNT: '1', DRIVER: 'latest' } - - { ARCH: 'arm64', PY_VER: '3.14t', CUDA_VER: '13.1.1', LOCAL_CTK: '1', GPU: 'a100', GPU_COUNT: '1', DRIVER: 'latest' } + - { ARCH: 'arm64', PY_VER: '3.14t', CUDA_VER: '13.2.0', LOCAL_CTK: '1', GPU: 'a100', GPU_COUNT: '1', DRIVER: 'latest' } # special runners - { ARCH: 'amd64', PY_VER: '3.13', CUDA_VER: '13.0.2', LOCAL_CTK: '1', GPU: 'h100', GPU_COUNT: '1', DRIVER: 'latest' } - - { ARCH: 'amd64', PY_VER: '3.13', CUDA_VER: '13.1.1', LOCAL_CTK: '1', GPU: 'h100', GPU_COUNT: '1', DRIVER: 'latest' } - - { ARCH: 'amd64', PY_VER: '3.14', CUDA_VER: '13.1.1', LOCAL_CTK: '1', GPU: 't4', GPU_COUNT: '2', DRIVER: 'latest' } - - { ARCH: 'amd64', PY_VER: '3.14t', CUDA_VER: '13.1.1', LOCAL_CTK: '1', GPU: 'h100', GPU_COUNT: '2', DRIVER: 'latest' } + - { ARCH: 'amd64', PY_VER: '3.13', CUDA_VER: '13.2.0', LOCAL_CTK: '1', GPU: 'h100', GPU_COUNT: '1', DRIVER: 'latest' } + - { ARCH: 'amd64', PY_VER: '3.14', CUDA_VER: '13.2.0', LOCAL_CTK: '1', GPU: 't4', GPU_COUNT: '2', DRIVER: 'latest' } + - { ARCH: 'amd64', PY_VER: '3.14t', CUDA_VER: '13.2.0', LOCAL_CTK: '1', GPU: 'h100', GPU_COUNT: '2', DRIVER: 'latest' } nightly: [] windows: @@ -67,20 +67,20 @@ windows: # win-64 - { ARCH: 'amd64', PY_VER: '3.10', CUDA_VER: '12.9.1', LOCAL_CTK: '0', GPU: 'rtx2080', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'WDDM' } - { ARCH: 'amd64', PY_VER: '3.10', CUDA_VER: '13.0.2', LOCAL_CTK: '1', GPU: 'rtxpro6000', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'TCC' } - - { ARCH: 'amd64', PY_VER: '3.10', CUDA_VER: '13.1.1', LOCAL_CTK: '1', GPU: 'rtxpro6000', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'TCC' } + - { ARCH: 'amd64', PY_VER: '3.10', CUDA_VER: '13.2.0', LOCAL_CTK: '1', GPU: 'rtxpro6000', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'TCC' } - { ARCH: 'amd64', PY_VER: '3.11', CUDA_VER: '12.9.1', LOCAL_CTK: '1', GPU: 'v100', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'MCDM' } - { ARCH: 'amd64', PY_VER: '3.11', CUDA_VER: '13.0.2', LOCAL_CTK: '0', GPU: 'rtx4090', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'WDDM' } - - { ARCH: 'amd64', PY_VER: '3.11', CUDA_VER: '13.1.1', LOCAL_CTK: '0', GPU: 'rtx4090', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'WDDM' } + - { ARCH: 'amd64', PY_VER: '3.11', CUDA_VER: '13.2.0', LOCAL_CTK: '0', GPU: 'rtx4090', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'WDDM' } - { ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '12.9.1', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'MCDM' } - { ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '13.0.2', LOCAL_CTK: '1', GPU: 'a100', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'TCC' } - - { ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '13.1.1', LOCAL_CTK: '1', GPU: 'a100', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'TCC' } + - { ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '13.2.0', LOCAL_CTK: '1', GPU: 'a100', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'TCC' } - { ARCH: 'amd64', PY_VER: '3.13', CUDA_VER: '12.9.1', LOCAL_CTK: '1', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'TCC' } - { ARCH: 'amd64', PY_VER: '3.13', CUDA_VER: '13.0.2', LOCAL_CTK: '0', GPU: 'rtxpro6000', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'MCDM' } - - { ARCH: 'amd64', PY_VER: '3.13', CUDA_VER: '13.1.1', LOCAL_CTK: '0', GPU: 'rtxpro6000', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'MCDM' } + - { ARCH: 'amd64', PY_VER: '3.13', CUDA_VER: '13.2.0', LOCAL_CTK: '0', GPU: 'rtxpro6000', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'MCDM' } - { ARCH: 'amd64', PY_VER: '3.14', CUDA_VER: '12.9.1', LOCAL_CTK: '0', GPU: 'v100', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'TCC' } - { ARCH: 'amd64', PY_VER: '3.14', CUDA_VER: '13.0.2', LOCAL_CTK: '1', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'MCDM' } - - { ARCH: 'amd64', PY_VER: '3.14', CUDA_VER: '13.1.1', LOCAL_CTK: '1', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'MCDM' } + - { ARCH: 'amd64', PY_VER: '3.14', CUDA_VER: '13.2.0', LOCAL_CTK: '1', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'MCDM' } - { ARCH: 'amd64', PY_VER: '3.14t', CUDA_VER: '12.9.1', LOCAL_CTK: '1', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'TCC' } - { ARCH: 'amd64', PY_VER: '3.14t', CUDA_VER: '13.0.2', LOCAL_CTK: '0', GPU: 'a100', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'MCDM' } - - { ARCH: 'amd64', PY_VER: '3.14t', CUDA_VER: '13.1.1', LOCAL_CTK: '0', GPU: 'a100', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'MCDM' } + - { ARCH: 'amd64', PY_VER: '3.14t', CUDA_VER: '13.2.0', LOCAL_CTK: '0', GPU: 'a100', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'MCDM' } nightly: []