From 3f1950e2734092a5f30d69912d54817e5277c3f4 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Mon, 6 Apr 2026 10:20:19 +0300 Subject: [PATCH 1/2] convert : set "add bos" == True for Gemma 4 --- convert_hf_to_gguf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index 7ba6f6a7425..c1737bb2c31 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -7472,7 +7472,7 @@ def set_vocab(self): special_vocab = gguf.SpecialVocab(self.dir_model, load_merges=True) special_vocab.add_to_gguf(self.gguf_writer) self.gguf_writer.add_add_space_prefix(False) - self.gguf_writer.add_add_bos_token(False) # already added via the chat template + self.gguf_writer.add_add_bos_token(True) def set_gguf_parameters(self): super().set_gguf_parameters() From 4e19abc52b275f547d2b9968095cc599c6e2e2e2 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Mon, 6 Apr 2026 12:18:47 +0300 Subject: [PATCH 2/2] cont : handle old GGUFs --- src/llama-vocab.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/llama-vocab.cpp b/src/llama-vocab.cpp index cb55b46b721..75dbaa91ee4 100644 --- a/src/llama-vocab.cpp +++ b/src/llama-vocab.cpp @@ -2325,6 +2325,14 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) { if (ml.get_key(LLM_KV_TOKENIZER_ADD_SEP, temp, false)) { add_sep = temp; } + + // workaround for Gemma 4 + // ref: https://github.com/ggml-org/llama.cpp/pull/21500 + if (pre_type == LLAMA_VOCAB_PRE_TYPE_GEMMA4 && !add_bos) { + add_bos = true; + + LLAMA_LOG_WARN("%s: override '%s' to 'true' for Gemma4\n", __func__, kv(LLM_KV_TOKENIZER_ADD_BOS).c_str()); + } } // auto-detect special tokens by text