diff --git a/.github/configs/amd-master.yaml b/.github/configs/amd-master.yaml index b9560803e..be45dceef 100644 --- a/.github/configs/amd-master.yaml +++ b/.github/configs/amd-master.yaml @@ -417,7 +417,7 @@ minimaxm2.5-fp8-mi300x-vllm: - { tp: 4, conc-start: 4, conc-end: 64 } minimaxm2.5-fp8-mi325x-vllm: - image: vllm/vllm-openai-rocm:v0.16.0 + image: vllm/vllm-openai-rocm:v0.18.0 model: MiniMaxAI/MiniMax-M2.5 model-prefix: minimaxm2.5 runner: mi325x @@ -429,17 +429,17 @@ minimaxm2.5-fp8-mi325x-vllm: osl: 1024 search-space: - { tp: 2, conc-start: 4, conc-end: 64 } - - { tp: 4, conc-start: 4, conc-end: 64 } + - { tp: 8, ep: 8, conc-start: 4, conc-end: 512 } - isl: 1024 osl: 8192 search-space: - { tp: 2, conc-start: 4, conc-end: 64 } - - { tp: 4, conc-start: 4, conc-end: 64 } + - { tp: 8, ep: 8, conc-start: 4, conc-end: 256 } - isl: 8192 osl: 1024 search-space: - { tp: 2, conc-start: 4, conc-end: 64 } - - { tp: 4, conc-start: 4, conc-end: 64 } + - { tp: 8, ep: 8, conc-start: 4, conc-end: 256 } gptoss-fp4-mi300x-vllm: image: vllm/vllm-openai-rocm:v0.17.0 diff --git a/benchmarks/single_node/minimaxm2.5_fp8_mi325x.sh b/benchmarks/single_node/minimaxm2.5_fp8_mi325x.sh index 6c9a2ef6b..7938de9ee 100755 --- a/benchmarks/single_node/minimaxm2.5_fp8_mi325x.sh +++ b/benchmarks/single_node/minimaxm2.5_fp8_mi325x.sh @@ -5,6 +5,7 @@ source "$(dirname "$0")/../benchmark_lib.sh" check_env_vars \ MODEL \ TP \ + EP_SIZE \ CONC \ ISL \ OSL \ @@ -30,16 +31,22 @@ export VLLM_ROCM_USE_AITER=1 SERVER_LOG=/workspace/server.log PORT=${PORT:-8888} +if [ "$EP_SIZE" -gt 1 ]; then + EP=" --enable-expert-parallel" +else + EP=" " +fi + # Start GPU monitoring (power, temperature, clocks every second) start_gpu_monitor set -x vllm serve $MODEL --port $PORT \ --tensor-parallel-size=$TP \ +$EP \ --gpu-memory-utilization 0.95 \ --max-model-len $MAX_MODEL_LEN \ --block-size=32 \ ---disable-log-requests \ --no-enable-prefix-caching \ --trust-remote-code > $SERVER_LOG 2>&1 & diff --git a/perf-changelog.yaml b/perf-changelog.yaml index 6b0689cbf..3f0dac605 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -1074,6 +1074,13 @@ - "dsr1-fp8-b200-sglang-mtp: v0.5.8-cu130-amd64 → v0.5.9-cu130" - "dsr1-fp8-h200-sglang: v0.5.9-cu129-amd64 → v0.5.9-cu130" pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/943 + +- config-keys: + - minimaxm2.5-fp8-mi325x-vllm + description: + - "Upgrade vLLM ROCm image from v0.16.0 to v0.18.0" + - "Replace TP4 with TP8/EP8, add conc range 4-256" + pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/953 - config-keys: - kimik2.5-fp4-mi355x-vllm