Skip to content
241 changes: 241 additions & 0 deletions .github/workflows/npu-test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,241 @@
name: NPU Build and Test

on:
push:
paths:
- '.github/workflows/npu-test.yml'
pull_request:
paths:
- '.github/workflows/npu-test.yml'
workflow_dispatch:
inputs:
torch_nightly_date:
description: 'PyTorch nightly 日期 (格式: YYYYMMDD,留空使用最新版)'
required: false
default: ''

jobs:
build-and-test:
name: Build and Test torch_npu
runs-on: linux-aarch64-a3-2
container:
image: swr.cn-north-4.myhuaweicloud.com/frameworkptadapter/pytorch_2.11.0_a2_aarch64_builder:20260331
options: --user root
env:
PYTHON_VERSION: '3.11'
DOCKER_IMAGE: swr.cn-north-4.myhuaweicloud.com/frameworkptadapter/pytorch_2.11.0_a2_aarch64_builder:20260331
AUDITWHEEL_PLAT: 'skip'

steps:
- name: Checkout
uses: actions/checkout@v4
with:
submodules: recursive

- name: Setup cache directories
run: |
mkdir -p /github/home/.cache/pip
mkdir -p /github/home/.cache/ccache
chmod -R 777 /github/home/.cache

- name: Install ccache
run: |
yum install -y ccache
ccache --version

- name: Cache pip
uses: actions/cache@v4
with:
path: /github/home/.cache/pip
key: pip-arm-py${{ env.PYTHON_VERSION }}-build-test
restore-keys: |
pip-arm-py${{ env.PYTHON_VERSION }}-

- name: Uninstall pre-installed packages
run: |
pip${{ env.PYTHON_VERSION }} uninstall -y torch torchvision pyyaml setuptools auditwheel || true
echo "Pre-installed packages uninstalled"

- name: Install PyTorch nightly
id: install_torch
run: |
PIP=pip${{ env.PYTHON_VERSION }}
PYTHON=python${{ env.PYTHON_VERSION }}

export PIP_CACHE_DIR=/github/home/.cache/pip
$PIP install --upgrade pip

# 安装基础依赖
$PIP install pyyaml setuptools auditwheel

if [ -n "${{ github.event.inputs.torch_nightly_date }}" ]; then
DATE="${{ github.event.inputs.torch_nightly_date }}"
$PIP install --pre "torch==2.12.0.dev${DATE}" --index-url https://download.pytorch.org/whl/nightly/cpu
else
# 按 requirements.txt 安装固定版本
$PIP install --pre "torch==2.12.0.dev20260217" --extra-index-url https://download.pytorch.org/whl/nightly/cpu
fi

TORCH_VER=$($PYTHON -c "import torch; print(torch.__version__)")
echo "version=${TORCH_VER}" >> $GITHUB_OUTPUT
echo "PyTorch nightly version: ${TORCH_VER}"

- name: Cache ccache
uses: actions/cache@v4
with:
path: /github/home/.cache/ccache
key: ccache-arm-py${{ env.PYTHON_VERSION }}-torch${{ steps.install_torch.outputs.version }}-${{ github.sha }}
restore-keys: |
ccache-arm-py${{ env.PYTHON_VERSION }}-torch${{ steps.install_torch.outputs.version }}-

- name: Build torch_npu wheel
id: build
run: |
PYTHON=python${{ env.PYTHON_VERSION }}

# 配置 ccache
if command -v ccache &> /dev/null; then
echo "ccache found, enabling ccache"
ccache -M 10G
ccache -z || true
export CC="ccache gcc"
export CXX="ccache g++"
export CCACHE_DIR=/github/home/.cache/ccache
export CCACHE_COMPRESS=1
export CCACHE_MAXSIZE=10G
export CCACHE_BASEDIR="${PWD}"
USE_CCACHE=1
else
echo "ccache not found, building without cache"
USE_CCACHE=0
fi

# 构建参数
echo "nproc value: $(nproc)"
echo "MAX_JOBS: 40"
export MAX_JOBS=40
export DISABLE_INSTALL_TORCHAIR=FALSE
export BUILD_WITHOUT_SHA=1

# 使用 ci/build.sh 脚本
bash ci/build.sh --python=${{ env.PYTHON_VERSION }} 2>&1 | tee /tmp/build.log
BUILD_STATUS=${PIPESTATUS[0]}

# ccache 统计(兼容 ccache 3.x/4.x 格式)
if [ "${USE_CCACHE}" = "1" ]; then
CCACHE_STATS=$(ccache -s | grep -E "cache hit|cache miss|cache size|hit rate" | tr '\n' ' ')
echo "ccache_stats=${CCACHE_STATS}" >> $GITHUB_OUTPUT
ccache -s
fi

echo "status=${BUILD_STATUS}" >> $GITHUB_OUTPUT
if [ ${BUILD_STATUS} -eq 0 ]; then
WHL=$(ls dist/*.whl 2>/dev/null | head -1)
echo "wheel=${WHL}" >> $GITHUB_OUTPUT
echo "Build succeeded: ${WHL}"
fi
exit ${BUILD_STATUS}

- name: Install torch_npu wheel
run: |
pip${{ env.PYTHON_VERSION }} install dist/torch_npu*.whl
echo "torch_npu wheel installed"

- name: Check Ascend paths
run: |
echo "=== Checking Ascend paths ==="
ls -la /usr/local/Ascend/ 2>&1 || echo "/usr/local/Ascend not found"
ls -la /usr/local/Ascend/cann/ 2>&1 || echo "/usr/local/Ascend/cann not found"
ls -la /usr/local/Ascend/nnal/ 2>&1 || echo "/usr/local/Ascend/nnal not found"

- name: Verify NPU availability
run: |
# 加载 CANN 环境变量
source /usr/local/Ascend/cann/set_env.sh 2>/dev/null || true
source /usr/local/Ascend/nnal/atb/set_env.sh 2>/dev/null || true

PYTHON=python${{ env.PYTHON_VERSION }}
# 切换到项目根目录的上一级,避免从源码目录加载 torch_npu
cd ..
echo "=== Testing torch_npu import ==="
$PYTHON -c "import torch; print(f'torch: {torch.__version__}'); import torch_npu; print(f'torch_npu: {torch_npu.__version__}'); print(f'NPU available: {torch.npu.is_available()}'); print(f'NPU count: {torch.npu.device_count()}'); print(f'NPU name: {torch.npu.get_device_name(0) if torch.npu.is_available() else \"N/A\"}')"

- name: Run test_device.py
id: run_tests
run: |
# 加载 CANN 环境变量
source /usr/local/Ascend/cann/set_env.sh 2>/dev/null || true
source /usr/local/Ascend/nnal/atb/set_env.sh 2>/dev/null || true

PYTHON=python${{ env.PYTHON_VERSION }}
PIP=pip${{ env.PYTHON_VERSION }}

# 步骤开始时自动回到项目根目录,进入 test 目录执行测试
cd test
$PYTHON -m pytest npu/test_device.py -v 2>&1 | tee /tmp/test.log

if [ $? -eq 0 ]; then
echo "status=0" >> $GITHUB_OUTPUT
echo "test_device.py: PASSED"
else
echo "status=1" >> $GITHUB_OUTPUT
echo "test_device.py: FAILED"
fi

- name: Upload build log
if: always()
uses: actions/upload-artifact@v4
with:
name: build-log-${{ github.run_number }}
path: /tmp/build.log
if-no-files-found: warn

- name: Upload test log
if: always()
uses: actions/upload-artifact@v4
with:
name: test-log-${{ github.run_number }}
path: /tmp/test.log
if-no-files-found: warn

- name: Upload wheel artifact
if: steps.build.outputs.status == '0'
uses: actions/upload-artifact@v4
with:
name: torch_npu-wheel-${{ github.run_number }}
path: dist/*.whl
if-no-files-found: warn

- name: Build and Test summary
if: always()
run: |
BUILD_STATUS="${{ steps.build.outputs.status }}"
TEST_STATUS="${{ steps.run_tests.outputs.status }}"

if [ "${BUILD_STATUS}" = "0" ]; then
BUILD_ICON="✅ SUCCESS"
else
BUILD_ICON="❌ FAILED"
fi

if [ "${TEST_STATUS}" = "0" ]; then
TEST_ICON="✅ PASSED"
else
TEST_ICON="❌ FAILED"
fi

cat >> $GITHUB_STEP_SUMMARY << EOF
## NPU Build and Test

| 项目 | 详情 |
|------|------|
| 执行时间 | $(date -u '+%Y-%m-%d %H:%M UTC') |
| Docker 镜像 | \`${{ env.DOCKER_IMAGE }}\` |
| PyTorch Nightly | \`${{ steps.install_torch.outputs.version }}\` |
| 仓库 Commit | \`${{ github.sha }}\` |
| ccache 统计 | ${{ steps.build.outputs.ccache_stats || 'N/A' }} |
| 构建结果 | ${BUILD_ICON} |
| 测试结果 | ${TEST_ICON} |

$( [ "${BUILD_STATUS}" = "0" ] && echo "> Wheel: \`${{ steps.build.outputs.wheel }}\`" || echo "> 查看 build-log artifact 获取详细错误信息" )
EOF
Loading