From b4bd4005fe19e5cfb5d8ad6b0ca92cc8bc7f6035 Mon Sep 17 00:00:00 2001 From: "claude[bot]" <41898282+claude[bot]@users.noreply.github.com> Date: Sat, 28 Mar 2026 05:32:42 +0000 Subject: [PATCH] Disable prefix caching for qwen3.5 & glm5 AMD benchmark scripts Add --disable-radix-cache to SGLang server launch commands for: - qwen3.5_bf16_mi300x.sh - qwen3.5_bf16_mi325x.sh - qwen3.5_bf16_mi355x.sh - qwen3.5_fp8_mi300x.sh - qwen3.5_fp8_mi325x.sh - qwen3.5_fp8_mi355x.sh - glm5_fp8_mi355x.sh Closes #968 Co-authored-by: functionstackx --- benchmarks/single_node/glm5_fp8_mi355x.sh | 3 ++- benchmarks/single_node/qwen3.5_bf16_mi300x.sh | 3 ++- benchmarks/single_node/qwen3.5_bf16_mi325x.sh | 3 ++- benchmarks/single_node/qwen3.5_bf16_mi355x.sh | 3 ++- benchmarks/single_node/qwen3.5_fp8_mi300x.sh | 3 ++- benchmarks/single_node/qwen3.5_fp8_mi325x.sh | 3 ++- benchmarks/single_node/qwen3.5_fp8_mi355x.sh | 3 ++- 7 files changed, 14 insertions(+), 7 deletions(-) diff --git a/benchmarks/single_node/glm5_fp8_mi355x.sh b/benchmarks/single_node/glm5_fp8_mi355x.sh index ee11463ce..690298986 100755 --- a/benchmarks/single_node/glm5_fp8_mi355x.sh +++ b/benchmarks/single_node/glm5_fp8_mi355x.sh @@ -44,7 +44,8 @@ python3 -m sglang.launch_server \ --mem-fraction-static 0.85 \ --model-loader-extra-config '{"enable_multithread_load": true, "num_threads": 8}' \ --nsa-prefill-backend tilelang \ - --nsa-decode-backend tilelang > $SERVER_LOG 2>&1 & + --nsa-decode-backend tilelang \ + --disable-radix-cache > $SERVER_LOG 2>&1 & SERVER_PID=$! diff --git a/benchmarks/single_node/qwen3.5_bf16_mi300x.sh b/benchmarks/single_node/qwen3.5_bf16_mi300x.sh index ea10647d6..1c14ab021 100755 --- a/benchmarks/single_node/qwen3.5_bf16_mi300x.sh +++ b/benchmarks/single_node/qwen3.5_bf16_mi300x.sh @@ -31,7 +31,8 @@ python3 -m sglang.launch_server \ --port $PORT \ --tensor-parallel-size $TP \ --trust-remote-code \ - --mem-fraction-static 0.8 > $SERVER_LOG 2>&1 & + --mem-fraction-static 0.8 \ + --disable-radix-cache > $SERVER_LOG 2>&1 & SERVER_PID=$! diff --git a/benchmarks/single_node/qwen3.5_bf16_mi325x.sh b/benchmarks/single_node/qwen3.5_bf16_mi325x.sh index ea10647d6..1c14ab021 100644 --- a/benchmarks/single_node/qwen3.5_bf16_mi325x.sh +++ b/benchmarks/single_node/qwen3.5_bf16_mi325x.sh @@ -31,7 +31,8 @@ python3 -m sglang.launch_server \ --port $PORT \ --tensor-parallel-size $TP \ --trust-remote-code \ - --mem-fraction-static 0.8 > $SERVER_LOG 2>&1 & + --mem-fraction-static 0.8 \ + --disable-radix-cache > $SERVER_LOG 2>&1 & SERVER_PID=$! diff --git a/benchmarks/single_node/qwen3.5_bf16_mi355x.sh b/benchmarks/single_node/qwen3.5_bf16_mi355x.sh index f77390707..90163ccfd 100755 --- a/benchmarks/single_node/qwen3.5_bf16_mi355x.sh +++ b/benchmarks/single_node/qwen3.5_bf16_mi355x.sh @@ -30,7 +30,8 @@ python3 -m sglang.launch_server \ --port $PORT \ --tensor-parallel-size $TP \ --trust-remote-code \ - --mem-fraction-static 0.8 > $SERVER_LOG 2>&1 & + --mem-fraction-static 0.8 \ + --disable-radix-cache > $SERVER_LOG 2>&1 & SERVER_PID=$! diff --git a/benchmarks/single_node/qwen3.5_fp8_mi300x.sh b/benchmarks/single_node/qwen3.5_fp8_mi300x.sh index 0640a20ab..83006ca95 100755 --- a/benchmarks/single_node/qwen3.5_fp8_mi300x.sh +++ b/benchmarks/single_node/qwen3.5_fp8_mi300x.sh @@ -32,7 +32,8 @@ python3 -m sglang.launch_server \ --port $PORT \ --tensor-parallel-size $TP \ --trust-remote-code \ - --mem-fraction-static 0.8 > $SERVER_LOG 2>&1 & + --mem-fraction-static 0.8 \ + --disable-radix-cache > $SERVER_LOG 2>&1 & SERVER_PID=$! diff --git a/benchmarks/single_node/qwen3.5_fp8_mi325x.sh b/benchmarks/single_node/qwen3.5_fp8_mi325x.sh index 0640a20ab..83006ca95 100755 --- a/benchmarks/single_node/qwen3.5_fp8_mi325x.sh +++ b/benchmarks/single_node/qwen3.5_fp8_mi325x.sh @@ -32,7 +32,8 @@ python3 -m sglang.launch_server \ --port $PORT \ --tensor-parallel-size $TP \ --trust-remote-code \ - --mem-fraction-static 0.8 > $SERVER_LOG 2>&1 & + --mem-fraction-static 0.8 \ + --disable-radix-cache > $SERVER_LOG 2>&1 & SERVER_PID=$! diff --git a/benchmarks/single_node/qwen3.5_fp8_mi355x.sh b/benchmarks/single_node/qwen3.5_fp8_mi355x.sh index f77390707..90163ccfd 100644 --- a/benchmarks/single_node/qwen3.5_fp8_mi355x.sh +++ b/benchmarks/single_node/qwen3.5_fp8_mi355x.sh @@ -30,7 +30,8 @@ python3 -m sglang.launch_server \ --port $PORT \ --tensor-parallel-size $TP \ --trust-remote-code \ - --mem-fraction-static 0.8 > $SERVER_LOG 2>&1 & + --mem-fraction-static 0.8 \ + --disable-radix-cache > $SERVER_LOG 2>&1 & SERVER_PID=$!