From 0c649bbaec37b2ce85c89c0eab948666f9244e74 Mon Sep 17 00:00:00 2001 From: zhutaoyu Date: Fri, 27 Mar 2026 06:23:35 +0000 Subject: [PATCH 1/6] add minimax mi325 --- .github/configs/amd-master.yaml | 16 ++++++++-------- perf-changelog.yaml | 8 ++++++++ 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/.github/configs/amd-master.yaml b/.github/configs/amd-master.yaml index fa48ee1be..57de9f123 100644 --- a/.github/configs/amd-master.yaml +++ b/.github/configs/amd-master.yaml @@ -392,7 +392,7 @@ minimaxm2.5-fp8-mi355x-vllm: - { tp: 8, ep: 8, conc-start: 32, conc-end: 256 } minimaxm2.5-fp8-mi300x-vllm: - image: vllm/vllm-openai-rocm:v0.16.0 + image: vllm/vllm-openai-rocm:v0.18.0 model: MiniMaxAI/MiniMax-M2.5 model-prefix: minimaxm2.5 runner: mi300x @@ -404,20 +404,20 @@ minimaxm2.5-fp8-mi300x-vllm: osl: 1024 search-space: - { tp: 2, conc-start: 4, conc-end: 64 } - - { tp: 4, conc-start: 4, conc-end: 64 } + - { tp: 8, ep: 8, conc-start: 4, conc-end: 512 } - isl: 1024 osl: 8192 search-space: - { tp: 2, conc-start: 4, conc-end: 64 } - - { tp: 4, conc-start: 4, conc-end: 64 } + - { tp: 8, ep: 8, conc-start: 4, conc-end: 256 } - isl: 8192 osl: 1024 search-space: - { tp: 2, conc-start: 4, conc-end: 64 } - - { tp: 4, conc-start: 4, conc-end: 64 } + - { tp: 8, ep: 8, conc-start: 4, conc-end: 256 } minimaxm2.5-fp8-mi325x-vllm: - image: vllm/vllm-openai-rocm:v0.16.0 + image: vllm/vllm-openai-rocm:v0.18.0 model: MiniMaxAI/MiniMax-M2.5 model-prefix: minimaxm2.5 runner: mi325x @@ -429,17 +429,17 @@ minimaxm2.5-fp8-mi325x-vllm: osl: 1024 search-space: - { tp: 2, conc-start: 4, conc-end: 64 } - - { tp: 4, conc-start: 4, conc-end: 64 } + - { tp: 8, ep: 8, conc-start: 4, conc-end: 512 } - isl: 1024 osl: 8192 search-space: - { tp: 2, conc-start: 4, conc-end: 64 } - - { tp: 4, conc-start: 4, conc-end: 64 } + - { tp: 8, ep: 8, conc-start: 4, conc-end: 256 } - isl: 8192 osl: 1024 search-space: - { tp: 2, conc-start: 4, conc-end: 64 } - - { tp: 4, conc-start: 4, conc-end: 64 } + - { tp: 8, ep: 8, conc-start: 4, conc-end: 256 } gptoss-fp4-mi300x-vllm: image: vllm/vllm-openai-rocm:v0.17.0 diff --git a/perf-changelog.yaml b/perf-changelog.yaml index b85245458..9690b3a48 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -1085,3 +1085,11 @@ - "Triton Fused Moe Tuning https://github.com/vllm-project/vllm/pull/35093" - "Add --max-num-seqs 256, remove --disable-log-requests" pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/950 + +- config-keys: + - minimaxm2.5-fp8-mi300x-vllm + - minimaxm2.5-fp8-mi325x-vllm + description: + - "Upgrade vLLM ROCm image from v0.16.0 to v0.18.0" + - "Replace TP4 with TP8/EP8, add conc range 4-256" + pr-link: TBD From c49fa9d416a780b5f4a06643f0d47d58d894d235 Mon Sep 17 00:00:00 2001 From: zhutaoyu Date: Fri, 27 Mar 2026 06:29:28 +0000 Subject: [PATCH 2/6] update minimax mi300x/mi325x perf-changelog pr-link Co-Authored-By: Claude Opus 4.6 --- perf-changelog.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/perf-changelog.yaml b/perf-changelog.yaml index 9690b3a48..45c3b3310 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -1092,4 +1092,4 @@ description: - "Upgrade vLLM ROCm image from v0.16.0 to v0.18.0" - "Replace TP4 with TP8/EP8, add conc range 4-256" - pr-link: TBD + pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/953 From 19c735aa276950db82f6a8b658c6cd45c13d0255 Mon Sep 17 00:00:00 2001 From: zhutaoyu Date: Fri, 27 Mar 2026 06:35:39 +0000 Subject: [PATCH 3/6] only minimax mi325 --- benchmarks/single_node/minimaxm2.5_fp8_mi300x.sh | 3 ++- benchmarks/single_node/minimaxm2.5_fp8_mi325x.sh | 3 ++- perf-changelog.yaml | 1 - 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/benchmarks/single_node/minimaxm2.5_fp8_mi300x.sh b/benchmarks/single_node/minimaxm2.5_fp8_mi300x.sh index 4dfaf6b80..8b811f2f5 100755 --- a/benchmarks/single_node/minimaxm2.5_fp8_mi300x.sh +++ b/benchmarks/single_node/minimaxm2.5_fp8_mi300x.sh @@ -5,6 +5,7 @@ source "$(dirname "$0")/../benchmark_lib.sh" check_env_vars \ MODEL \ TP \ + EP_SIZE \ CONC \ ISL \ OSL \ @@ -34,10 +35,10 @@ start_gpu_monitor set -x vllm serve $MODEL --port $PORT \ --tensor-parallel-size=$TP \ +$EP \ --gpu-memory-utilization 0.95 \ --max-model-len $MAX_MODEL_LEN \ --block-size=32 \ ---disable-log-requests \ --trust-remote-code > $SERVER_LOG 2>&1 & SERVER_PID=$! diff --git a/benchmarks/single_node/minimaxm2.5_fp8_mi325x.sh b/benchmarks/single_node/minimaxm2.5_fp8_mi325x.sh index e5d404036..ca9617084 100755 --- a/benchmarks/single_node/minimaxm2.5_fp8_mi325x.sh +++ b/benchmarks/single_node/minimaxm2.5_fp8_mi325x.sh @@ -5,6 +5,7 @@ source "$(dirname "$0")/../benchmark_lib.sh" check_env_vars \ MODEL \ TP \ + EP_SIZE \ CONC \ ISL \ OSL \ @@ -36,10 +37,10 @@ start_gpu_monitor set -x vllm serve $MODEL --port $PORT \ --tensor-parallel-size=$TP \ +$EP \ --gpu-memory-utilization 0.95 \ --max-model-len $MAX_MODEL_LEN \ --block-size=32 \ ---disable-log-requests \ --trust-remote-code > $SERVER_LOG 2>&1 & SERVER_PID=$! diff --git a/perf-changelog.yaml b/perf-changelog.yaml index 45c3b3310..ab55e40cc 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -1087,7 +1087,6 @@ pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/950 - config-keys: - - minimaxm2.5-fp8-mi300x-vllm - minimaxm2.5-fp8-mi325x-vllm description: - "Upgrade vLLM ROCm image from v0.16.0 to v0.18.0" From f05ab17b8c3308f2f595b972e231d9d309975ea8 Mon Sep 17 00:00:00 2001 From: zhutaoyu Date: Fri, 27 Mar 2026 06:54:32 +0000 Subject: [PATCH 4/6] fix --- benchmarks/single_node/minimaxm2.5_fp8_mi300x.sh | 6 ++++++ benchmarks/single_node/minimaxm2.5_fp8_mi325x.sh | 6 ++++++ 2 files changed, 12 insertions(+) diff --git a/benchmarks/single_node/minimaxm2.5_fp8_mi300x.sh b/benchmarks/single_node/minimaxm2.5_fp8_mi300x.sh index 8b811f2f5..cebbf72a0 100755 --- a/benchmarks/single_node/minimaxm2.5_fp8_mi300x.sh +++ b/benchmarks/single_node/minimaxm2.5_fp8_mi300x.sh @@ -29,6 +29,12 @@ export VLLM_ROCM_USE_AITER=1 SERVER_LOG=/workspace/server.log PORT=${PORT:-8888} +if [ "$EP_SIZE" -gt 1 ]; then + EP=" --enable-expert-parallel" +else + EP=" " +fi + # Start GPU monitoring (power, temperature, clocks every second) start_gpu_monitor diff --git a/benchmarks/single_node/minimaxm2.5_fp8_mi325x.sh b/benchmarks/single_node/minimaxm2.5_fp8_mi325x.sh index ca9617084..cb90d21fa 100755 --- a/benchmarks/single_node/minimaxm2.5_fp8_mi325x.sh +++ b/benchmarks/single_node/minimaxm2.5_fp8_mi325x.sh @@ -31,6 +31,12 @@ export VLLM_ROCM_USE_AITER=1 SERVER_LOG=/workspace/server.log PORT=${PORT:-8888} +if [ "$EP_SIZE" -gt 1 ]; then + EP=" --enable-expert-parallel" +else + EP=" " +fi + # Start GPU monitoring (power, temperature, clocks every second) start_gpu_monitor From aebc624c0a88629939cf4160453541b0c12a760d Mon Sep 17 00:00:00 2001 From: zhutaoyu Date: Fri, 27 Mar 2026 08:54:57 +0000 Subject: [PATCH 5/6] fix --- perf-changelog.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/perf-changelog.yaml b/perf-changelog.yaml index 347c27101..7ddbc4750 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -1022,7 +1022,7 @@ - "Config concurrency: 32-256" - "update image to vllm/vllm-openai-rocm:v0.18.0" pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/927 - + - config-keys: - gptoss-fp4-mi325x-vllm - minimaxm2.5-fp8-mi325x-vllm From b0b54d71a0dbd178cb350734a00dd163b7137552 Mon Sep 17 00:00:00 2001 From: zhutaoyu Date: Fri, 27 Mar 2026 15:03:45 +0000 Subject: [PATCH 6/6] remove mi300x configs --- .github/configs/amd-master.yaml | 8 ++++---- benchmarks/single_node/minimaxm2.5_fp8_mi300x.sh | 9 +-------- 2 files changed, 5 insertions(+), 12 deletions(-) diff --git a/.github/configs/amd-master.yaml b/.github/configs/amd-master.yaml index d6d958576..be45dceef 100644 --- a/.github/configs/amd-master.yaml +++ b/.github/configs/amd-master.yaml @@ -392,7 +392,7 @@ minimaxm2.5-fp8-mi355x-vllm: - { tp: 8, ep: 8, conc-start: 32, conc-end: 256 } minimaxm2.5-fp8-mi300x-vllm: - image: vllm/vllm-openai-rocm:v0.18.0 + image: vllm/vllm-openai-rocm:v0.16.0 model: MiniMaxAI/MiniMax-M2.5 model-prefix: minimaxm2.5 runner: mi300x @@ -404,17 +404,17 @@ minimaxm2.5-fp8-mi300x-vllm: osl: 1024 search-space: - { tp: 2, conc-start: 4, conc-end: 64 } - - { tp: 8, ep: 8, conc-start: 4, conc-end: 512 } + - { tp: 4, conc-start: 4, conc-end: 64 } - isl: 1024 osl: 8192 search-space: - { tp: 2, conc-start: 4, conc-end: 64 } - - { tp: 8, ep: 8, conc-start: 4, conc-end: 256 } + - { tp: 4, conc-start: 4, conc-end: 64 } - isl: 8192 osl: 1024 search-space: - { tp: 2, conc-start: 4, conc-end: 64 } - - { tp: 8, ep: 8, conc-start: 4, conc-end: 256 } + - { tp: 4, conc-start: 4, conc-end: 64 } minimaxm2.5-fp8-mi325x-vllm: image: vllm/vllm-openai-rocm:v0.18.0 diff --git a/benchmarks/single_node/minimaxm2.5_fp8_mi300x.sh b/benchmarks/single_node/minimaxm2.5_fp8_mi300x.sh index cebbf72a0..4dfaf6b80 100755 --- a/benchmarks/single_node/minimaxm2.5_fp8_mi300x.sh +++ b/benchmarks/single_node/minimaxm2.5_fp8_mi300x.sh @@ -5,7 +5,6 @@ source "$(dirname "$0")/../benchmark_lib.sh" check_env_vars \ MODEL \ TP \ - EP_SIZE \ CONC \ ISL \ OSL \ @@ -29,22 +28,16 @@ export VLLM_ROCM_USE_AITER=1 SERVER_LOG=/workspace/server.log PORT=${PORT:-8888} -if [ "$EP_SIZE" -gt 1 ]; then - EP=" --enable-expert-parallel" -else - EP=" " -fi - # Start GPU monitoring (power, temperature, clocks every second) start_gpu_monitor set -x vllm serve $MODEL --port $PORT \ --tensor-parallel-size=$TP \ -$EP \ --gpu-memory-utilization 0.95 \ --max-model-len $MAX_MODEL_LEN \ --block-size=32 \ +--disable-log-requests \ --trust-remote-code > $SERVER_LOG 2>&1 & SERVER_PID=$!