diff --git a/configs/inference/amd-vllm.json b/configs/inference/amd-vllm.json index dcde3557f..318338024 100644 --- a/configs/inference/amd-vllm.json +++ b/configs/inference/amd-vllm.json @@ -6,9 +6,9 @@ "engine_images": [ { "compute_type": "gpu", - "image": "opencsghq/amd-vllm:rocm7.0.0_vllm_0.19.0", - "driver_version": "7.0", - "engine_version": "0.19.0" + "image": "opencsghq/amd-vllm:rocm7.2.1_vllm_0.20.1", + "driver_version": "7.2.1", + "engine_version": "0.20.1" } ], "engine_args": [ @@ -370,13 +370,18 @@ "Phi4ForCausalLMV", "SarvamMLAForCausalLM", "SarvamMoEForCausalLM", - "TeleChat3ForCausalLM" + "TeleChat3ForCausalLM", + "DeepseekV4ForCausalLM", + "Rnj1ForCausalLM", + "HYV3ForCausalLM", + "Ministral3ForCausalLM" ], "tool_call_parsers": { "DeepseekR1ForCausalLM": "deepseek_r1", "DeepseekV3ForCausalLM": "deepseek_v3", "DeepseekV31ForCausalLM": "deepseek_v31", "DeepseekV32ForCausalLM": "deepseek_v32", + "DeepseekV4ForCausalLM": "deepseek_v4", "Ernie4_5ForCausalLM": "ernie45", "Glm4ForCausalLM": "glm47", "Glm4MoeForCausalLM": "glm45", diff --git a/configs/inference/vllm.json b/configs/inference/vllm.json index c0aacba25..c527d0fcd 100644 --- a/configs/inference/vllm.json +++ b/configs/inference/vllm.json @@ -6,9 +6,9 @@ "engine_images": [ { "compute_type": "gpu", - "image": "opencsghq/vllm:v0.19.0", + "image": "opencsghq/vllm:v0.20.1", "driver_version": "13.0", - "engine_version": "v0.19.0", + "engine_version": "v0.20.1", "extra_archs": [ "AXK1ForCausalLM", "ApertusForCausalLM", @@ -180,7 +180,11 @@ "UltravoxModel", "VoxtralForConditionalGeneration", "VoxtralRealtimeGeneration", - "WhisperForConditionalGeneration" + "WhisperForConditionalGeneration", + "DeepseekV4ForCausalLM", + "Rnj1ForCausalLM", + "HYV3ForCausalLM", + "Ministral3ForCausalLM" ] }, { @@ -393,13 +397,18 @@ "TeleChat2ForCausalLM", "TeleFLMForCausalLM", "XverseForCausalLM", - "Zamba2ForCausalLM" + "Zamba2ForCausalLM", + "DeepseekV4ForCausalLM", + "Rnj1ForCausalLM", + "HYV3ForCausalLM", + "Ministral3ForCausalLM" ], "tool_call_parsers": { "DeepseekR1ForCausalLM": "deepseek_r1", "DeepseekV3ForCausalLM": "deepseek_v3", "DeepseekV31ForCausalLM": "deepseek_v31", "DeepseekV32ForCausalLM": "deepseek_v32", + "DeepseekV4ForCausalLM": "deepseek_v4", "Ernie4_5ForCausalLM": "ernie45", "Glm4ForCausalLM": "glm47", "Glm4MoeForCausalLM": "glm45", diff --git a/docker/inference/Dockerfile.vllm b/docker/inference/Dockerfile.vllm index 3454d9aa2..a26695d3a 100644 --- a/docker/inference/Dockerfile.vllm +++ b/docker/inference/Dockerfile.vllm @@ -1,4 +1,4 @@ -FROM vllm/vllm-openai:v0.19.0-cu130 +FROM vllm/vllm-openai:v0.20.1 RUN pip config set global.index-url https://mirrors.aliyun.com/pypi/simple RUN pip install --no-cache-dir csghub-sdk==0.7.3 ray supervisor RUN sed -i 's|http://archive.ubuntu.com/ubuntu|http://mirrors.aliyun.com/ubuntu|g' /etc/apt/sources.list && \ diff --git a/docker/inference/Dockerfile.vllm-amd b/docker/inference/Dockerfile.vllm-amd index 0467a596f..52eb4a02c 100644 --- a/docker/inference/Dockerfile.vllm-amd +++ b/docker/inference/Dockerfile.vllm-amd @@ -1,4 +1,4 @@ -FROM vllm/vllm-openai-rocm:v0.19.0 +FROM vllm/vllm-openai-rocm:v0.20.1 RUN pip config set global.index-url https://mirrors.aliyun.com/pypi/simple RUN pip install --no-cache-dir csghub-sdk==0.7.3 ray supervisor RUN sed -i 's|http://archive.ubuntu.com/ubuntu|http://mirrors.aliyun.com/ubuntu|g' /etc/apt/sources.list && \ diff --git a/docker/inference/README.md b/docker/inference/README.md index 51901e6cc..0193a1ef8 100644 --- a/docker/inference/README.md +++ b/docker/inference/README.md @@ -12,15 +12,15 @@ echo "$OPENCSG_ACR_PASSWORD" | docker login $OPENCSG_ACR -u $OPENCSG_ACR_USERNAM ```bash export BUILDX_NO_DEFAULT_ATTESTATIONS=1 -# For vllm: opencsg-registry.cn-beijing.cr.aliyuncs.com/opencsghq/vllm:v0.8.5 -export IMAGE_TAG=v0.17.0 +# For vllm: opencsg-registry.cn-beijing.cr.aliyuncs.com/opencsghq/vllm:v0.20.1 +export IMAGE_TAG=v0.20.1 docker buildx build --platform linux/amd64,linux/arm64 \ -t ${OPENCSG_ACR}/opencsghq/vllm:${IMAGE_TAG} \ -f Dockerfile.vllm \ --push . -# For amd-vllm: opencsg-registry.cn-beijing.cr.aliyuncs.com/opencsghq/amd-vllm:rocm7.0.0_vllm_0.19.0 -export IMAGE_TAG=rocm7.0.0_vllm_0.19.0 +# For amd-vllm: opencsg-registry.cn-beijing.cr.aliyuncs.com/opencsghq/amd-vllm:rocm7.2.1_vllm_0.20.1 +export IMAGE_TAG=rocm7.2.1_vllm_0.20.1 docker buildx build --platform linux/amd64 \ -t ${OPENCSG_ACR}/opencsghq/amd-vllm:${IMAGE_TAG} \ -f Dockerfile.vllm-amd \