diff --git a/docker/Dockerfile.gpu b/docker/Dockerfile.gpu index 161aa87..cd3096c 100644 --- a/docker/Dockerfile.gpu +++ b/docker/Dockerfile.gpu @@ -11,8 +11,10 @@ ENV DEBIAN_FRONTEND=noninteractive \ TOKENIZERS_PARALLELISM=false # System packages (if needed, keep minimal) -RUN apt-get update && apt-get install -y --no-install-recommends ca-certificates && \ - apt-get clean && \ +RUN apt-get update && apt-get install -y --no-install-recommends \ + ca-certificates \ + git \ + && apt-get clean && \ rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/* # Upgrade pip @@ -31,7 +33,10 @@ RUN pip install "vllm>=0.11.0" RUN pip install "transformers>=4.52.0" -RUN pip install "llmcompressor>=0.8.0" +# Install llmcompressor from specific commit that includes _update_mamba_mask fix +# Commit 4cfc0e6217c263cb7450cbf95764de4a1fbffab8 (Oct 14, 2025) +# This fix is not yet in any release (latest is 0.8.1 from Oct 8, 2025) +RUN pip install git+https://github.com/vllm-project/llm-compressor.git@4cfc0e6217c263cb7450cbf95764de4a1fbffab8 # Install llama.cpp for GGUF quantization support ARG LLAMA_CPP_VERSION=b6945