diff --git a/benchmarks/single_node/glm5_fp8_mi355x.sh b/benchmarks/single_node/glm5_fp8_mi355x.sh index ee11463ce..690298986 100755 --- a/benchmarks/single_node/glm5_fp8_mi355x.sh +++ b/benchmarks/single_node/glm5_fp8_mi355x.sh @@ -44,7 +44,8 @@ python3 -m sglang.launch_server \ --mem-fraction-static 0.85 \ --model-loader-extra-config '{"enable_multithread_load": true, "num_threads": 8}' \ --nsa-prefill-backend tilelang \ - --nsa-decode-backend tilelang > $SERVER_LOG 2>&1 & + --nsa-decode-backend tilelang \ + --disable-radix-cache > $SERVER_LOG 2>&1 & SERVER_PID=$! diff --git a/benchmarks/single_node/qwen3.5_bf16_mi300x.sh b/benchmarks/single_node/qwen3.5_bf16_mi300x.sh index ea10647d6..1c14ab021 100755 --- a/benchmarks/single_node/qwen3.5_bf16_mi300x.sh +++ b/benchmarks/single_node/qwen3.5_bf16_mi300x.sh @@ -31,7 +31,8 @@ python3 -m sglang.launch_server \ --port $PORT \ --tensor-parallel-size $TP \ --trust-remote-code \ - --mem-fraction-static 0.8 > $SERVER_LOG 2>&1 & + --mem-fraction-static 0.8 \ + --disable-radix-cache > $SERVER_LOG 2>&1 & SERVER_PID=$! diff --git a/benchmarks/single_node/qwen3.5_bf16_mi325x.sh b/benchmarks/single_node/qwen3.5_bf16_mi325x.sh index ea10647d6..1c14ab021 100644 --- a/benchmarks/single_node/qwen3.5_bf16_mi325x.sh +++ b/benchmarks/single_node/qwen3.5_bf16_mi325x.sh @@ -31,7 +31,8 @@ python3 -m sglang.launch_server \ --port $PORT \ --tensor-parallel-size $TP \ --trust-remote-code \ - --mem-fraction-static 0.8 > $SERVER_LOG 2>&1 & + --mem-fraction-static 0.8 \ + --disable-radix-cache > $SERVER_LOG 2>&1 & SERVER_PID=$! diff --git a/benchmarks/single_node/qwen3.5_bf16_mi355x.sh b/benchmarks/single_node/qwen3.5_bf16_mi355x.sh index f77390707..90163ccfd 100755 --- a/benchmarks/single_node/qwen3.5_bf16_mi355x.sh +++ b/benchmarks/single_node/qwen3.5_bf16_mi355x.sh @@ -30,7 +30,8 @@ python3 -m sglang.launch_server \ --port $PORT \ --tensor-parallel-size $TP \ --trust-remote-code \ - --mem-fraction-static 0.8 > $SERVER_LOG 2>&1 & + --mem-fraction-static 0.8 \ + --disable-radix-cache > $SERVER_LOG 2>&1 & SERVER_PID=$! diff --git a/benchmarks/single_node/qwen3.5_fp8_mi300x.sh b/benchmarks/single_node/qwen3.5_fp8_mi300x.sh index 0640a20ab..83006ca95 100755 --- a/benchmarks/single_node/qwen3.5_fp8_mi300x.sh +++ b/benchmarks/single_node/qwen3.5_fp8_mi300x.sh @@ -32,7 +32,8 @@ python3 -m sglang.launch_server \ --port $PORT \ --tensor-parallel-size $TP \ --trust-remote-code \ - --mem-fraction-static 0.8 > $SERVER_LOG 2>&1 & + --mem-fraction-static 0.8 \ + --disable-radix-cache > $SERVER_LOG 2>&1 & SERVER_PID=$! diff --git a/benchmarks/single_node/qwen3.5_fp8_mi325x.sh b/benchmarks/single_node/qwen3.5_fp8_mi325x.sh index 0640a20ab..83006ca95 100755 --- a/benchmarks/single_node/qwen3.5_fp8_mi325x.sh +++ b/benchmarks/single_node/qwen3.5_fp8_mi325x.sh @@ -32,7 +32,8 @@ python3 -m sglang.launch_server \ --port $PORT \ --tensor-parallel-size $TP \ --trust-remote-code \ - --mem-fraction-static 0.8 > $SERVER_LOG 2>&1 & + --mem-fraction-static 0.8 \ + --disable-radix-cache > $SERVER_LOG 2>&1 & SERVER_PID=$! diff --git a/benchmarks/single_node/qwen3.5_fp8_mi355x.sh b/benchmarks/single_node/qwen3.5_fp8_mi355x.sh index f77390707..90163ccfd 100644 --- a/benchmarks/single_node/qwen3.5_fp8_mi355x.sh +++ b/benchmarks/single_node/qwen3.5_fp8_mi355x.sh @@ -30,7 +30,8 @@ python3 -m sglang.launch_server \ --port $PORT \ --tensor-parallel-size $TP \ --trust-remote-code \ - --mem-fraction-static 0.8 > $SERVER_LOG 2>&1 & + --mem-fraction-static 0.8 \ + --disable-radix-cache > $SERVER_LOG 2>&1 & SERVER_PID=$!