Skip to content

Commit 214d6c7

Browse files
committed
Merge remote-tracking branch 'origin/main' into explicit-graph-construction
2 parents b78fd68 + 09069a3 commit 214d6c7

40 files changed

Lines changed: 1839 additions & 1427 deletions

.github/actions/fetch_ctk/action.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ inputs:
1414
cuda-components:
1515
description: "A list of the CTK components to install as a comma-separated list. e.g. 'cuda_nvcc,cuda_nvrtc,cuda_cudart'"
1616
required: false
17-
default: "cuda_nvcc,cuda_cudart,cuda_crt,libnvvm,cuda_nvrtc,cuda_profiler_api,cuda_cccl,libnvjitlink,libcufile,libnvfatbin"
17+
default: "cuda_nvcc,cuda_cudart,cuda_crt,libnvvm,cuda_nvrtc,cuda_profiler_api,cuda_cccl,cuda_cupti,libnvjitlink,libcufile,libnvfatbin"
1818
cuda-path:
1919
description: "where the CTK components will be installed to, relative to $PWD"
2020
required: false

.github/workflows/bandit.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,6 @@ jobs:
4242
with:
4343
args: "check --select S --ignore ${{ steps.ignore-codes.outputs.codes }} --output-format sarif --output-file results.sarif"
4444
- name: Upload SARIF file
45-
uses: github/codeql-action/upload-sarif@v4.32.4
45+
uses: github/codeql-action/upload-sarif@v4.32.5
4646
with:
4747
sarif_file: results.sarif

.github/workflows/build-wheel.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -369,7 +369,7 @@ jobs:
369369
370370
OLD_BRANCH=$(yq '.backport_branch' ci/versions.yml)
371371
OLD_BASENAME="cuda-bindings-python${PYTHON_VERSION_FORMATTED}-cuda*-${{ inputs.host-platform }}*"
372-
LATEST_PRIOR_RUN_ID=$(gh run list -b ${OLD_BRANCH} -L 1 -w "ci.yml" -s completed -R NVIDIA/cuda-python --json databaseId | jq '.[]| .databaseId')
372+
LATEST_PRIOR_RUN_ID=$(gh run list -b ${OLD_BRANCH} -L 1 -w "ci.yml" -s success -R NVIDIA/cuda-python --json databaseId | jq '.[]| .databaseId')
373373
if [[ "$LATEST_PRIOR_RUN_ID" == "" ]]; then
374374
echo "LATEST_PRIOR_RUN_ID not found!"
375375
exit 1

.github/workflows/codeql.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,13 +31,13 @@ jobs:
3131
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
3232

3333
- name: Initialize CodeQL
34-
uses: github/codeql-action/init@0ec47d036c68ae0cf94c629009b1029407111281 # v3.31.8
34+
uses: github/codeql-action/init@40f0fa95c41fede7b43f035cb47aac899ee0ba0a # v3.31.8
3535
with:
3636
languages: ${{ matrix.language }}
3737
build-mode: ${{ matrix.build-mode }}
3838
queries: security-extended
3939

4040
- name: Perform CodeQL Analysis
41-
uses: github/codeql-action/analyze@0ec47d036c68ae0cf94c629009b1029407111281 # v3.31.8
41+
uses: github/codeql-action/analyze@40f0fa95c41fede7b43f035cb47aac899ee0ba0a # v3.31.8
4242
with:
4343
category: "/language:${{matrix.language}}"

.github/workflows/coverage.yml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,8 @@ jobs:
9898
9999
- name: Build cuda-pathfinder
100100
run: |
101-
.venv/bin/pip install -v ./cuda_pathfinder --group test
101+
cd cuda_pathfinder
102+
../.venv/bin/pip install -v . --group test
102103
103104
- name: Build cuda-bindings
104105
run: |

.github/workflows/test-wheel-linux.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,7 @@ jobs:
151151
152152
OLD_BRANCH=${{ needs.compute-matrix.outputs.OLD_BRANCH }}
153153
OLD_BASENAME="cuda-bindings-python${PYTHON_VERSION_FORMATTED}-cuda*-${{ inputs.host-platform }}*"
154-
LATEST_PRIOR_RUN_ID=$(gh run list -b ${OLD_BRANCH} -L 1 -w "ci.yml" -s completed -R NVIDIA/cuda-python --json databaseId | jq '.[]| .databaseId')
154+
LATEST_PRIOR_RUN_ID=$(gh run list -b ${OLD_BRANCH} -L 1 -w "ci.yml" -s success -R NVIDIA/cuda-python --json databaseId | jq '.[]| .databaseId')
155155
if [[ "$LATEST_PRIOR_RUN_ID" == "" ]]; then
156156
echo "LATEST_PRIOR_RUN_ID not found!"
157157
exit 1

.github/workflows/test-wheel-windows.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,7 @@ jobs:
137137
run: |
138138
$OLD_BRANCH = yq '.backport_branch' ci/versions.yml
139139
$OLD_BASENAME = "cuda-bindings-python${env:PYTHON_VERSION_FORMATTED}-cuda*-${{ inputs.host-platform }}*"
140-
$runData = gh run list -b $OLD_BRANCH -L 1 -w "ci.yml" -s completed -R NVIDIA/cuda-python --json databaseId | ConvertFrom-Json
140+
$runData = gh run list -b $OLD_BRANCH -L 1 -w "ci.yml" -s success -R NVIDIA/cuda-python --json databaseId | ConvertFrom-Json
141141
if (-not $runData -or $runData.Length -eq 0 -or -not $runData[0].databaseId -or [string]::IsNullOrEmpty($runData[0].databaseId)) {
142142
Write-Host "LATEST_PRIOR_RUN_ID not found!"
143143
exit 1

cuda_bindings/examples/0_Introduction/clock_nvrtc_test.py

Lines changed: 46 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
import numpy as np
77
from common import common
8-
from common.helper_cuda import checkCudaErrors, findCudaDevice
8+
from common.helper_cuda import check_cuda_errors, find_cuda_device
99

1010
from cuda.bindings import driver as cuda
1111

@@ -50,8 +50,8 @@
5050
}
5151
"""
5252

53-
NUM_BLOCKS = 64
54-
NUM_THREADS = 256
53+
num_blocks = 64
54+
num_threads = 256
5555

5656

5757
def elems_to_bytes(nelems, dt):
@@ -64,52 +64,52 @@ def main():
6464
if platform.machine() == "armv7l":
6565
pytest.skip("clock_nvrtc is not supported on ARMv7")
6666

67-
timer = np.empty(NUM_BLOCKS * 2, dtype="int64")
68-
hinput = np.empty(NUM_THREADS * 2, dtype="float32")
67+
timer = np.empty(num_blocks * 2, dtype="int64")
68+
hinput = np.empty(num_threads * 2, dtype="float32")
6969

70-
for i in range(NUM_THREADS * 2):
70+
for i in range(num_threads * 2):
7171
hinput[i] = i
7272

73-
devID = findCudaDevice()
74-
with common.KernelHelper(clock_nvrtc, devID) as kernelHelper:
75-
kernel_addr = kernelHelper.getFunction(b"timedReduction")
76-
77-
dinput = checkCudaErrors(cuda.cuMemAlloc(hinput.nbytes))
78-
doutput = checkCudaErrors(cuda.cuMemAlloc(elems_to_bytes(NUM_BLOCKS, np.float32)))
79-
dtimer = checkCudaErrors(cuda.cuMemAlloc(timer.nbytes))
80-
checkCudaErrors(cuda.cuMemcpyHtoD(dinput, hinput, hinput.nbytes))
81-
82-
args = ((dinput, doutput, dtimer), (None, None, None))
83-
shared_memory_nbytes = elems_to_bytes(2 * NUM_THREADS, np.float32)
84-
85-
grid_dims = (NUM_BLOCKS, 1, 1)
86-
block_dims = (NUM_THREADS, 1, 1)
87-
88-
checkCudaErrors(
89-
cuda.cuLaunchKernel(
90-
kernel_addr,
91-
*grid_dims, # grid dim
92-
*block_dims, # block dim
93-
shared_memory_nbytes,
94-
0, # shared mem, stream
95-
args,
96-
0,
97-
)
98-
) # arguments
99-
100-
checkCudaErrors(cuda.cuCtxSynchronize())
101-
checkCudaErrors(cuda.cuMemcpyDtoH(timer, dtimer, timer.nbytes))
102-
checkCudaErrors(cuda.cuMemFree(dinput))
103-
checkCudaErrors(cuda.cuMemFree(doutput))
104-
checkCudaErrors(cuda.cuMemFree(dtimer))
105-
106-
avgElapsedClocks = 0.0
107-
108-
for i in range(NUM_BLOCKS):
109-
avgElapsedClocks += timer[i + NUM_BLOCKS] - timer[i]
110-
111-
avgElapsedClocks = avgElapsedClocks / NUM_BLOCKS
112-
print(f"Average clocks/block = {avgElapsedClocks}")
73+
dev_id = find_cuda_device()
74+
kernel_helper = common.KernelHelper(clock_nvrtc, dev_id)
75+
kernel_addr = kernel_helper.get_function(b"timedReduction")
76+
77+
dinput = check_cuda_errors(cuda.cuMemAlloc(hinput.nbytes))
78+
doutput = check_cuda_errors(cuda.cuMemAlloc(elems_to_bytes(num_blocks, np.float32)))
79+
dtimer = check_cuda_errors(cuda.cuMemAlloc(timer.nbytes))
80+
check_cuda_errors(cuda.cuMemcpyHtoD(dinput, hinput, hinput.nbytes))
81+
82+
args = ((dinput, doutput, dtimer), (None, None, None))
83+
shared_memory_nbytes = elems_to_bytes(2 * num_threads, np.float32)
84+
85+
grid_dims = (num_blocks, 1, 1)
86+
block_dims = (num_threads, 1, 1)
87+
88+
check_cuda_errors(
89+
cuda.cuLaunchKernel(
90+
kernel_addr,
91+
*grid_dims, # grid dim
92+
*block_dims, # block dim
93+
shared_memory_nbytes,
94+
0, # shared mem, stream
95+
args,
96+
0,
97+
)
98+
) # arguments
99+
100+
check_cuda_errors(cuda.cuCtxSynchronize())
101+
check_cuda_errors(cuda.cuMemcpyDtoH(timer, dtimer, timer.nbytes))
102+
check_cuda_errors(cuda.cuMemFree(dinput))
103+
check_cuda_errors(cuda.cuMemFree(doutput))
104+
check_cuda_errors(cuda.cuMemFree(dtimer))
105+
106+
avg_elapsed_clocks = 0.0
107+
108+
for i in range(num_blocks):
109+
avg_elapsed_clocks += timer[i + num_blocks] - timer[i]
110+
111+
avg_elapsed_clocks = avg_elapsed_clocks / num_blocks
112+
print(f"Average clocks/block = {avg_elapsed_clocks}")
113113

114114

115115
if __name__ == "__main__":

0 commit comments

Comments
 (0)