Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
158 changes: 136 additions & 22 deletions Jenkinsfile
Original file line number Diff line number Diff line change
@@ -1,6 +1,37 @@
// Test success cache lives under ${WORKSPACE}/.migraphx-ci-test-success-cache (same commit + image tag skips).
// Set MIGRAPHX_CI_FORCE_TESTS=true to always run all tests.
DOCKER_IMAGE = 'rocm/migraphx-ci-jenkins-ubuntu'
DOCKER_IMAGE_ORT = 'rocm/migraphx-ci-jenkins-ubuntu-ort'

def testSuccessCacheBase() {
return "${env.WORKSPACE}/.migraphx-ci-test-success-cache"
}

def ciTestCacheEnabled() {
def force = env.MIGRAPHX_CI_FORCE_TESTS?.trim()?.toLowerCase()
def enabled = force != 'true' && force != '1'
// #region agent log
echo "[MIGRAPHX_CI_SKIP_TRACE] ciTestCacheEnabled: cacheBase=${testSuccessCacheBase()}, MIGRAPHX_CI_FORCE_TESTS=${force ?: '(empty)'}, enabled=${enabled}"
// #endregion
return enabled
}

def safeJobNameForCache() {
return env.JOB_NAME.replaceAll('/', '_')
}

def successMarkerPath(String gitCommit, String imageTag, String stageId) {
def base = testSuccessCacheBase()
def job = safeJobNameForCache()
return "${base}/${job}/${gitCommit}/${imageTag}/${stageId}.ok"
}

def debCachePath(String gitCommit, String imageTag) {
def base = testSuccessCacheBase()
def job = safeJobNameForCache()
return "${base}/${job}/debs/${gitCommit}/${imageTag}"
}

def getgputargets() {
targets="gfx906;gfx908;gfx90a;gfx1030;gfx1100;gfx1101;gfx1201"
return targets
Expand Down Expand Up @@ -106,6 +137,8 @@ def cmake_build = { bconf ->
def rocmtest = { Map conf = [:], Closure body ->
def variant = conf.get("variant", env.STAGE_NAME)
def setup = conf.get("setup", {})
def stageCacheId = conf.get("stageCacheId", null)
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should come from variant variable.

def cacheImageTag = conf.get("cacheImageTag", env.IMAGE_TAG)
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

imageTag is already passed in. This is redundant.


def docker_args = conf.get("docker_args", "")
def image = conf.get("image", DOCKER_IMAGE)
Expand All @@ -114,29 +147,110 @@ def rocmtest = { Map conf = [:], Closure body ->
env.CCACHE_COMPRESSLEVEL = 7
env.CCACHE_DIR = ccache
env.HSA_ENABLE_SDMA = 0

def skipTests = false
def markerPath = ''
def gitCommit = ''

gitStatusWrapper(credentialsId: "${env.migraphx_ci_creds}", gitHubContext: "Jenkins - ${variant}", account: 'ROCmSoftwarePlatform', repo: 'AMDMIGraphX') {
def docker_opts
def docker_opts = ''
stage("setup ${variant}") {
sh 'printenv'
checkout scm
setup()
gitCommit = sh(returnStdout: true, script: 'git rev-parse HEAD').trim()
// #region agent log
echo "[MIGRAPHX_CI_SKIP_TRACE] setup ${variant}: stageCacheId=${stageCacheId ?: '(null)'}, cacheImageTag=${cacheImageTag ?: '(null)'}, IMAGE_TAG=${env.IMAGE_TAG ?: '(null)'}, gitCommit=${gitCommit}"
// #endregion

def video_id = sh(returnStdout: true, script: 'getent group video | cut -d: -f3').trim()
def render_id = sh(returnStdout: true, script: 'getent group render | cut -d: -f3').trim()
docker_opts = "--device=/dev/kfd --device=/dev/dri --cap-add SYS_PTRACE -v=${env.WORKSPACE}/../:/workspaces:rw,z"
docker_opts = docker_opts + " --group-add=${video_id} --group-add=${render_id} "
echo "Docker flags: ${docker_opts}"
if (!stageCacheId) {
// #region agent log
echo "[MIGRAPHX_CI_SKIP_TRACE] setup ${variant}: no stageCacheId — skip-cache logic not used for this rocmtest"
// #endregion
} else if (!ciTestCacheEnabled()) {
// #region agent log
echo "[MIGRAPHX_CI_SKIP_TRACE] setup ${variant}: cache disabled (MIGRAPHX_CI_FORCE_TESTS) — will run full tests"
// #endregion
} else {
// #region agent log
echo "[MIGRAPHX_CI_SKIP_TRACE] setup ${variant}: cache is ON — checking marker on agent filesystem"
// #endregion
markerPath = successMarkerPath(gitCommit, cacheImageTag, stageCacheId)
env.MIGRAPHX_CI_MARKER_PATH = markerPath
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These dont need to be environment variables.

env.MIGRAPHX_CI_DEB_CACHE = debCachePath(gitCommit, env.IMAGE_TAG)
// #region agent log
echo "[MIGRAPHX_CI_SKIP_TRACE] setup ${variant}: markerPath=${markerPath}"
echo "[MIGRAPHX_CI_SKIP_TRACE] setup ${variant}: debCachePath=${env.MIGRAPHX_CI_DEB_CACHE}"
// #endregion
skipTests = sh(returnStatus: true, script: 'test -f "$MIGRAPHX_CI_MARKER_PATH"') == 0
// #region agent log
echo "[MIGRAPHX_CI_SKIP_TRACE] setup ${variant}: marker file exists=${skipTests}"
// #endregion
if (skipTests && stageCacheId == 'hip_clang_release') {
def debOk = sh(returnStatus: true, script: 'for f in "$MIGRAPHX_CI_DEB_CACHE"/*.deb; do test -f "$f" && exit 0; done; exit 1') == 0
// #region agent log
echo "[MIGRAPHX_CI_SKIP_TRACE] setup ${variant}: HIP deb cache has .deb files=${debOk}"
// #endregion
if (!debOk) {
echo 'HIP Clang Release: marker exists but cached .deb missing; running full build'
skipTests = false
}
}
}

// #region agent log
echo "[MIGRAPHX_CI_SKIP_TRACE] setup ${variant}: final skipTests=${skipTests}"
// #endregion

if (skipTests) {
echo "Skipping tests for ${stageCacheId} (cached success, commit ${gitCommit})"
if (stageCacheId == 'hip_clang_release') {
sh 'mkdir -p build && cp "$MIGRAPHX_CI_DEB_CACHE"/*.deb build/'
}
} else {
setup()

withCredentials([usernamePassword(credentialsId: 'docker_test_cred', passwordVariable: 'DOCKERHUB_PASS', usernameVariable: 'DOCKERHUB_USER')]) {
sh "echo $DOCKERHUB_PASS | docker login --username $DOCKERHUB_USER --password-stdin"
sh "docker pull ${image}:${imageTag}"
def video_id = sh(returnStdout: true, script: 'getent group video | cut -d: -f3').trim()
def render_id = sh(returnStdout: true, script: 'getent group render | cut -d: -f3').trim()
docker_opts = "--device=/dev/kfd --device=/dev/dri --cap-add SYS_PTRACE -v=${env.WORKSPACE}/../:/workspaces:rw,z"
docker_opts = docker_opts + " --group-add=${video_id} --group-add=${render_id} "
echo "Docker flags: ${docker_opts}"

withCredentials([usernamePassword(credentialsId: 'docker_test_cred', passwordVariable: 'DOCKERHUB_PASS', usernameVariable: 'DOCKERHUB_USER')]) {
sh "echo $DOCKERHUB_PASS | docker login --username $DOCKERHUB_USER --password-stdin"
sh "docker pull ${image}:${imageTag}"
}
}
}

stage("build ${variant}") {
withDockerContainer(image: "${image}:${imageTag}", args: docker_opts + docker_args) {
timeout(time: 4, unit: 'HOURS') {
body()
// #region agent log
echo "[MIGRAPHX_CI_SKIP_TRACE] build ${variant}: branch=${skipTests && stageCacheId == 'hip_clang_release' ? 'hip_skip_stash' : skipTests ? 'skip_no_docker' : 'full_run'}"
// #endregion
if (skipTests && stageCacheId == 'hip_clang_release') {
stash includes: 'build/*.deb', name: 'migraphx-package'
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is really bad, it relies on name of the stages and re-add in the code. If we need to replay the stashing then it would be better to have a flag we pass to rocmtest that does the stashing, something like rocmtest(stash: "build/*.deb")

echo 'HIP Clang Release: stashed restored .deb for downstream ONNX stage'
} else if (skipTests) {
echo "Skipping docker build/test for ${stageCacheId} (cached success)"
} else {
withDockerContainer(image: "${image}:${imageTag}", args: docker_opts + docker_args) {
timeout(time: 4, unit: 'HOURS') {
body()
}
}
if (stageCacheId && ciTestCacheEnabled()) {
env.MIGRAPHX_CI_MARKER_PATH = successMarkerPath(gitCommit, cacheImageTag, stageCacheId)
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This doesnt need to be an environment variable.

if (stageCacheId == 'hip_clang_release') {
env.MIGRAPHX_CI_DEB_CACHE = debCachePath(gitCommit, env.IMAGE_TAG)
sh 'mkdir -p "$MIGRAPHX_CI_DEB_CACHE" && cp build/*.deb "$MIGRAPHX_CI_DEB_CACHE"/'
}
sh 'mkdir -p "$(dirname "$MIGRAPHX_CI_MARKER_PATH")" && touch "$MIGRAPHX_CI_MARKER_PATH" && echo "BUILD_URL=${BUILD_URL}" >> "$MIGRAPHX_CI_MARKER_PATH"'
// #region agent log
echo "[MIGRAPHX_CI_SKIP_TRACE] build ${variant}: wrote success marker and (if HIP) cached .deb"
// #endregion
} else {
// #region agent log
echo "[MIGRAPHX_CI_SKIP_TRACE] build ${variant}: full run finished but NOT writing marker (stageCacheId=${stageCacheId ?: 'null'} or cache disabled)"
// #endregion
}
}
}
Expand Down Expand Up @@ -218,7 +332,7 @@ pipeline {
}
steps {
script {
rocmtest([:]) {
rocmtest(stageCacheId: 'all_targets_release') {
cmake_build(flags: "-DCMAKE_BUILD_TYPE=release -DMIGRAPHX_ENABLE_GPU=On -DMIGRAPHX_ENABLE_CPU=On -DMIGRAPHX_ENABLE_FPGA=On -DGPU_TARGETS='${getgputargets()}'")
}
}
Expand All @@ -231,7 +345,7 @@ pipeline {
}
steps {
script {
rocmtest([:]) {
rocmtest(stageCacheId: 'clang_asan') {
def sanitizers = "undefined,address"
def debug_flags = "-g -O2 -fno-omit-frame-pointer -fsanitize=${sanitizers} -fno-sanitize-recover=${sanitizers}"
cmake_build(flags: "-DCMAKE_BUILD_TYPE=debug -DMIGRAPHX_ENABLE_C_API_TEST=Off -DMIGRAPHX_ENABLE_PYTHON=Off -DMIGRAPHX_ENABLE_GPU=Off -DMIGRAPHX_ENABLE_CPU=On -DCMAKE_CXX_FLAGS_DEBUG='${debug_flags}'", compiler: '/usr/bin/clang++-17')
Expand All @@ -246,7 +360,7 @@ pipeline {
}
steps {
script {
rocmtest([:]) {
rocmtest(stageCacheId: 'clang_libstdcxx_debug') {
def sanitizers = "undefined"
def debug_flags = "-g -O2 -fno-omit-frame-pointer -fsanitize=${sanitizers} -fno-sanitize-recover=${sanitizers} -D_GLIBCXX_DEBUG"
cmake_build(flags: "-DCMAKE_BUILD_TYPE=debug -DMIGRAPHX_ENABLE_C_API_TEST=Off -DMIGRAPHX_ENABLE_PYTHON=Off -DMIGRAPHX_ENABLE_GPU=Off -DMIGRAPHX_ENABLE_CPU=Off -DCMAKE_CXX_FLAGS_DEBUG='${debug_flags}'", compiler: '/usr/bin/clang++-17')
Expand All @@ -261,7 +375,7 @@ pipeline {
}
steps {
script {
rocmtest([:]) {
rocmtest(stageCacheId: 'hip_clang_release') {
cmake_build(flags: "-DCMAKE_BUILD_TYPE=release -DGPU_TARGETS='${getgputargets()}'")
stash includes: 'build/*.deb', name: 'migraphx-package'
}
Expand All @@ -275,7 +389,7 @@ pipeline {
}
steps {
script {
rocmtest([:]) {
rocmtest(stageCacheId: 'hip_clang_release_navi32') {
cmake_build(flags: "-DCMAKE_BUILD_TYPE=release -DGPU_TARGETS='${getnavi3xtargets()}' -DMIGRAPHX_DISABLE_ONNX_TESTS=On")
}
}
Expand All @@ -288,7 +402,7 @@ pipeline {
}
steps {
script {
rocmtest([:]) {
rocmtest(stageCacheId: 'hip_clang_release_navi4x') {
cmake_build(flags: "-DCMAKE_BUILD_TYPE=release -DGPU_TARGETS='${getnavi4xtargets()}' -DMIGRAPHX_DISABLE_ONNX_TESTS=On")
}
}
Expand All @@ -305,7 +419,7 @@ pipeline {
}
steps {
script {
rocmtest([:]) {
rocmtest(stageCacheId: 'hip_rtc_debug') {
def sanitizers = "undefined"
def debug_flags = "-g -O2 -fsanitize=${sanitizers} -fno-sanitize=vptr,function -fno-sanitize-recover=${sanitizers}"
cmake_build(flags: "-DCMAKE_C_COMPILER=/opt/rocm/llvm/bin/clang -DCMAKE_BUILD_TYPE=debug -DMIGRAPHX_ENABLE_PYTHON=Off -DCMAKE_CXX_FLAGS_DEBUG='${debug_flags}' -DCMAKE_C_FLAGS_DEBUG='${debug_flags}' -DMIGRAPHX_USE_HIPRTC=On -DGPU_TARGETS='${getgputargets()}'", gpu_debug: '1')
Expand All @@ -332,7 +446,7 @@ pipeline {
}
steps {
script {
rocmtest([:]) {
rocmtest(stageCacheId: 'mlir_debug') {
// Note: the -fno-sanitize= is copied from upstream LLVM_UBSAN_FLAGS.
def sanitizers = "undefined"
def debug_flags = "-g -O2 -fsanitize=${sanitizers} -fno-sanitize=vptr,function -fno-sanitize-recover=${sanitizers}"
Expand Down Expand Up @@ -396,7 +510,7 @@ pipeline {
}
steps {
script {
rocmtest(setup: setuppackage, docker_args: '-u root', image: DOCKER_IMAGE_ORT, imageTag: env.IMAGE_TAG_ORT) {
rocmtest(setup: setuppackage, docker_args: '-u root', image: DOCKER_IMAGE_ORT, imageTag: env.IMAGE_TAG_ORT, stageCacheId: 'onnx_runtime_tests', cacheImageTag: env.IMAGE_TAG_ORT) {
sh '''
apt install half
#ls -lR
Expand Down
47 changes: 47 additions & 0 deletions docs/Jenkins.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
# Jenkins CI (MIGraphX)

## Skipping tests that already passed (same commit)

When Jenkins restarts or a new build runs for the **same Git commit**, you can avoid re-running stages that already succeeded by enabling a filesystem-backed success cache.

### Environment variables

| Variable | Purpose |
|----------|---------|
| `MIGRAPHX_CI_TEST_SUCCESS_CACHE` | Absolute path to a base directory. If **unset or empty**, caching is **disabled** and behavior matches the original pipeline. If set, each test stage writes a marker file after success and skips work on a later run when the marker exists for the same commit and image tag. |
| `MIGRAPHX_CI_FORCE_TESTS` | If `true` or `1`, ignores the cache and always runs all tests. |

### Cache key

Success is keyed by:

- Jenkins `JOB_NAME` (sanitized for paths)
- `git rev-parse HEAD` at checkout
- Docker image tag (`IMAGE_TAG` for main CI image; `IMAGE_TAG_ORT` for ONNX Runtime tests)
- A fixed stage id (e.g. `all_targets_release`, `hip_clang_release`, `onnx_runtime_tests`)

Markers are stored as:

`<cache>/<job_name>/<commit>/<image_tag>/<stage_id>.ok`

### Shared vs per-agent cache

- **Shared storage (recommended):** Point `MIGRAPHX_CI_TEST_SUCCESS_CACHE` at an NFS path (or similar) visible to **all** agents (mi100+, nogpu, Navi, onnxrt, etc.). Then a stage that passed on one machine can be skipped when another agent picks up the same commit.
- **Per-agent cache:** If the path is only local (e.g. under `/workspaces/.cache/...` on each machine), skips apply only when that **same** agent runs the same stage again for the same commit. Other agents will not see the markers.

### HIP Clang Release and ONNX Runtime

The ONNX stage consumes `.deb` packages via Jenkins **stash** from HIP Clang Release. After a controller restart, a new build has no stash. With the cache enabled:

- After a successful HIP Clang Release build, `build/*.deb` is copied to
`<cache>/<job_name>/debs/<commit>/<IMAGE_TAG>/`.
- If HIP Clang Release is skipped (marker hit), those `.deb` files are restored into `build/`, then **stashed** again so the ONNX agent can **unstash** as before.

**HIP skip + ONNX run requires** the cached `.deb` files to exist. If a marker exists but the deb directory is missing or empty, HIP Clang Release runs a full build again.

For ONNX-only skips, the marker uses `IMAGE_TAG_ORT` so ORT image changes invalidate the cache without a code change.

### Caveats

- **Flaky tests:** A stage that passed once may be skipped on retry; use `MIGRAPHX_CI_FORCE_TESTS=true` or delete the relevant marker/cache entries to force a re-run.
- **Cleanup:** Old cache entries are not TTL-pruned by the pipeline; remove them manually if disk use grows.
Loading