From d32969d346d3f3e1584b720f3672f47c438faab1 Mon Sep 17 00:00:00 2001 From: ddh0 Date: Mon, 19 Jan 2026 10:34:42 -0600 Subject: [PATCH 1/9] initial commit for branch --- convert_hf_to_gguf.py | 6 +++++- convert_hf_to_gguf_update.py | 1 + 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index 464ecbaab91..c7164026777 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -1255,6 +1255,9 @@ def get_vocab_base_pre(self, tokenizer) -> str: if chkhsh == "6c81ce329e0802883b22eabab0d3fa48357337ef1ecb45443828bf1f6254833f": # ref: https://huggingface.co/LGAI-EXAONE/K-EXAONE-236B-A23B res = "exaone-moe" + if chkhsh == "cdf5f35325780597efd76153d4d1c16778f766173908894c04afc20108536267": + # ref: https://huggingface.co/zai-org/GLM-4.7-Flash + res = "glm-4.7-flash" if res is None: logger.warning("\n") @@ -7458,7 +7461,8 @@ def prepare_tensors(self): "DeepseekV3ForCausalLM", "KimiVLForConditionalGeneration", "YoutuForCausalLM", - "YoutuVLForConditionalGeneration" + "YoutuVLForConditionalGeneration", + "Glm4MoeLiteForCausalLM" ) class DeepseekV2Model(TextModel): model_arch = gguf.MODEL_ARCH.DEEPSEEK2 diff --git a/convert_hf_to_gguf_update.py b/convert_hf_to_gguf_update.py index aa9843ea17f..460198be692 100755 --- a/convert_hf_to_gguf_update.py +++ b/convert_hf_to_gguf_update.py @@ -148,6 +148,7 @@ class TOKENIZER_TYPE(IntEnum): {"name": "youtu", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/tencent/Youtu-LLM-2B", }, {"name": "solar-open", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/upstage/Solar-Open-100B", }, {"name": "exaone-moe", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/LGAI-EXAONE/K-EXAONE-236B-A23B", }, + {"name": "glm-4.7-flash", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/zai-org/GLM-4.7-Flash", }, ] # some models are known to be broken upstream, so we will skip them as exceptions From 38e4882469a57eab0d022b1ae1de7bdb33eec4be Mon Sep 17 00:00:00 2001 From: ddh0 Date: Mon, 19 Jan 2026 11:01:07 -0600 Subject: [PATCH 2/9] add glm-4.7-flash, move tokenizer hash --- convert_hf_to_gguf_update.py | 3 +-- src/llama-vocab.cpp | 4 ++++ src/llama-vocab.h | 1 + 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/convert_hf_to_gguf_update.py b/convert_hf_to_gguf_update.py index 460198be692..1a8fbd5bb3e 100755 --- a/convert_hf_to_gguf_update.py +++ b/convert_hf_to_gguf_update.py @@ -148,7 +148,6 @@ class TOKENIZER_TYPE(IntEnum): {"name": "youtu", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/tencent/Youtu-LLM-2B", }, {"name": "solar-open", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/upstage/Solar-Open-100B", }, {"name": "exaone-moe", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/LGAI-EXAONE/K-EXAONE-236B-A23B", }, - {"name": "glm-4.7-flash", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/zai-org/GLM-4.7-Flash", }, ] # some models are known to be broken upstream, so we will skip them as exceptions @@ -171,9 +170,9 @@ class TOKENIZER_TYPE(IntEnum): {"name": "grok-2", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/alvarobartt/grok-2-tokenizer", "chkhsh": "66b8d4e19ab16c3bfd89bce5d785fb7e0155e8648708a1f42077cb9fe002c273"}, # jina-v2-de variants {"name": "jina-v2-de", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/aari1995/German_Semantic_V3", "chkhsh": "b3d1dd861f1d4c5c0d2569ce36baf3f90fe8a102db3de50dd71ff860d91be3df"}, + {"name": "glm-4.7-flash", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/zai-org/GLM-4.7-Flash", "chkhsh": "cdf5f35325780597efd76153d4d1c16778f766173908894c04afc20108536267"}, ] - def download_file_with_auth(url, token, save_path): headers = {"Authorization": f"Bearer {token}"} if token else None response = sess.get(url, headers=headers) diff --git a/src/llama-vocab.cpp b/src/llama-vocab.cpp index a23950d007c..f332dbe7e56 100644 --- a/src/llama-vocab.cpp +++ b/src/llama-vocab.cpp @@ -2041,6 +2041,10 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) { tokenizer_pre == "solar-open") { pre_type = LLAMA_VOCAB_PRE_TYPE_SOLAR_OPEN; clean_spaces = false; + } else if ( + tokenizer_pre == "glm-4.7-flash") { + pre_type = LLAMA_VOCAB_PRE_TYPE_GLM_4_7_FLASH; + clean_spaces = false; } else { throw std::runtime_error(format("unknown pre-tokenizer type: '%s'", tokenizer_pre.c_str())); } diff --git a/src/llama-vocab.h b/src/llama-vocab.h index 28c3a82b91e..20f94102a04 100644 --- a/src/llama-vocab.h +++ b/src/llama-vocab.h @@ -54,6 +54,7 @@ enum llama_vocab_pre_type { LLAMA_VOCAB_PRE_TYPE_SOLAR_OPEN = 43, LLAMA_VOCAB_PRE_TYPE_YOUTU = 44, LLAMA_VOCAB_PRE_TYPE_EXAONE_MOE = 45, + LLAMA_VOCAB_PRE_TYPE_GLM_4_7_FLASH = 46, }; struct LLM_KV; From eb630d4a408cd585c65428da2ba93760c35091c5 Mon Sep 17 00:00:00 2001 From: ddh0 Date: Mon, 19 Jan 2026 11:22:17 -0600 Subject: [PATCH 3/9] use `glm4` pretok --- convert_hf_to_gguf.py | 2 +- convert_hf_to_gguf_update.py | 2 +- src/llama-vocab.cpp | 4 ---- src/llama-vocab.h | 1 - 4 files changed, 2 insertions(+), 7 deletions(-) diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index c7164026777..894839aa95a 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -1257,7 +1257,7 @@ def get_vocab_base_pre(self, tokenizer) -> str: res = "exaone-moe" if chkhsh == "cdf5f35325780597efd76153d4d1c16778f766173908894c04afc20108536267": # ref: https://huggingface.co/zai-org/GLM-4.7-Flash - res = "glm-4.7-flash" + res = "glm4" if res is None: logger.warning("\n") diff --git a/convert_hf_to_gguf_update.py b/convert_hf_to_gguf_update.py index 1a8fbd5bb3e..2d3883fb408 100755 --- a/convert_hf_to_gguf_update.py +++ b/convert_hf_to_gguf_update.py @@ -170,7 +170,7 @@ class TOKENIZER_TYPE(IntEnum): {"name": "grok-2", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/alvarobartt/grok-2-tokenizer", "chkhsh": "66b8d4e19ab16c3bfd89bce5d785fb7e0155e8648708a1f42077cb9fe002c273"}, # jina-v2-de variants {"name": "jina-v2-de", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/aari1995/German_Semantic_V3", "chkhsh": "b3d1dd861f1d4c5c0d2569ce36baf3f90fe8a102db3de50dd71ff860d91be3df"}, - {"name": "glm-4.7-flash", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/zai-org/GLM-4.7-Flash", "chkhsh": "cdf5f35325780597efd76153d4d1c16778f766173908894c04afc20108536267"}, + {"name": "glm4", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/zai-org/GLM-4.7-Flash", "chkhsh": "cdf5f35325780597efd76153d4d1c16778f766173908894c04afc20108536267"}, ] def download_file_with_auth(url, token, save_path): diff --git a/src/llama-vocab.cpp b/src/llama-vocab.cpp index f332dbe7e56..a23950d007c 100644 --- a/src/llama-vocab.cpp +++ b/src/llama-vocab.cpp @@ -2041,10 +2041,6 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) { tokenizer_pre == "solar-open") { pre_type = LLAMA_VOCAB_PRE_TYPE_SOLAR_OPEN; clean_spaces = false; - } else if ( - tokenizer_pre == "glm-4.7-flash") { - pre_type = LLAMA_VOCAB_PRE_TYPE_GLM_4_7_FLASH; - clean_spaces = false; } else { throw std::runtime_error(format("unknown pre-tokenizer type: '%s'", tokenizer_pre.c_str())); } diff --git a/src/llama-vocab.h b/src/llama-vocab.h index 20f94102a04..28c3a82b91e 100644 --- a/src/llama-vocab.h +++ b/src/llama-vocab.h @@ -54,7 +54,6 @@ enum llama_vocab_pre_type { LLAMA_VOCAB_PRE_TYPE_SOLAR_OPEN = 43, LLAMA_VOCAB_PRE_TYPE_YOUTU = 44, LLAMA_VOCAB_PRE_TYPE_EXAONE_MOE = 45, - LLAMA_VOCAB_PRE_TYPE_GLM_4_7_FLASH = 46, }; struct LLM_KV; From c64f9e003cb607420d624e498969fe70b046d786 Mon Sep 17 00:00:00 2001 From: ddh0 Date: Mon, 19 Jan 2026 12:26:06 -0600 Subject: [PATCH 4/9] silence flake8 E302 (CI) --- convert_hf_to_gguf_update.py | 1 + 1 file changed, 1 insertion(+) diff --git a/convert_hf_to_gguf_update.py b/convert_hf_to_gguf_update.py index 2d3883fb408..2811f7f884a 100755 --- a/convert_hf_to_gguf_update.py +++ b/convert_hf_to_gguf_update.py @@ -173,6 +173,7 @@ class TOKENIZER_TYPE(IntEnum): {"name": "glm4", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/zai-org/GLM-4.7-Flash", "chkhsh": "cdf5f35325780597efd76153d4d1c16778f766173908894c04afc20108536267"}, ] + def download_file_with_auth(url, token, save_path): headers = {"Authorization": f"Bearer {token}"} if token else None response = sess.get(url, headers=headers) From 354e2b525a662224379005d0e6ad764aec3779cf Mon Sep 17 00:00:00 2001 From: ddh0 Date: Mon, 19 Jan 2026 12:29:51 -0600 Subject: [PATCH 5/9] apply review feedback --- convert_hf_to_gguf.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index 894839aa95a..5d0aa617592 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -1078,6 +1078,9 @@ def get_vocab_base_pre(self, tokenizer) -> str: if chkhsh == "b3d1dd861f1d4c5c0d2569ce36baf3f90fe8a102db3de50dd71ff860d91be3df": # ref: https://huggingface.co/aari1995/German_Semantic_V3 res = "jina-v2-de" + if chkhsh == "cdf5f35325780597efd76153d4d1c16778f766173908894c04afc20108536267": + # ref: https://huggingface.co/zai-org/GLM-4.7-Flash + res = "glm4" if chkhsh == "0ef9807a4087ebef797fc749390439009c3b9eda9ad1a097abbe738f486c01e5": # ref: https://huggingface.co/meta-llama/Meta-Llama-3-8B res = "llama-bpe" @@ -1255,9 +1258,6 @@ def get_vocab_base_pre(self, tokenizer) -> str: if chkhsh == "6c81ce329e0802883b22eabab0d3fa48357337ef1ecb45443828bf1f6254833f": # ref: https://huggingface.co/LGAI-EXAONE/K-EXAONE-236B-A23B res = "exaone-moe" - if chkhsh == "cdf5f35325780597efd76153d4d1c16778f766173908894c04afc20108536267": - # ref: https://huggingface.co/zai-org/GLM-4.7-Flash - res = "glm4" if res is None: logger.warning("\n") @@ -7462,7 +7462,7 @@ def prepare_tensors(self): "KimiVLForConditionalGeneration", "YoutuForCausalLM", "YoutuVLForConditionalGeneration", - "Glm4MoeLiteForCausalLM" + "Glm4MoeLiteForCausalLM", ) class DeepseekV2Model(TextModel): model_arch = gguf.MODEL_ARCH.DEEPSEEK2 From 27961dbb9c3539dd9af6ee2b38e8cc1286710d52 Mon Sep 17 00:00:00 2001 From: Xuan Son Nguyen Date: Mon, 19 Jan 2026 21:43:04 +0100 Subject: [PATCH 6/9] add <|user|> as eog --- src/llama-vocab.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/llama-vocab.cpp b/src/llama-vocab.cpp index a23950d007c..0766e326287 100644 --- a/src/llama-vocab.cpp +++ b/src/llama-vocab.cpp @@ -2423,6 +2423,7 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) { || t.first == "_" || t.first == "<|end_of_text|>" || t.first == "" // smoldocling + || t.first == "<|user|>" // glm-4.7-lite ) { special_eog_ids.insert(t.second); if ((attr & LLAMA_TOKEN_ATTR_CONTROL) == 0) { From 4d9befc540a0f2107cccfce1b28a3a644d67c80b Mon Sep 17 00:00:00 2001 From: ddh0 Date: Mon, 19 Jan 2026 14:53:17 -0600 Subject: [PATCH 7/9] also add EOG `<|observation|>` --- src/llama-vocab.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/llama-vocab.cpp b/src/llama-vocab.cpp index 0766e326287..afc6bb35d0f 100644 --- a/src/llama-vocab.cpp +++ b/src/llama-vocab.cpp @@ -2424,6 +2424,7 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) { || t.first == "<|end_of_text|>" || t.first == "" // smoldocling || t.first == "<|user|>" // glm-4.7-lite + || t.first == "<|observation|>" // glm-4.7-lite ) { special_eog_ids.insert(t.second); if ((attr & LLAMA_TOKEN_ATTR_CONTROL) == 0) { From 9ce75e411bea17bff544bb711b45d33f21812a78 Mon Sep 17 00:00:00 2001 From: Xuan Son Nguyen Date: Mon, 19 Jan 2026 22:03:39 +0100 Subject: [PATCH 8/9] revert llama-vocab --- src/llama-vocab.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/llama-vocab.cpp b/src/llama-vocab.cpp index afc6bb35d0f..a23950d007c 100644 --- a/src/llama-vocab.cpp +++ b/src/llama-vocab.cpp @@ -2423,8 +2423,6 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) { || t.first == "_" || t.first == "<|end_of_text|>" || t.first == "" // smoldocling - || t.first == "<|user|>" // glm-4.7-lite - || t.first == "<|observation|>" // glm-4.7-lite ) { special_eog_ids.insert(t.second); if ((attr & LLAMA_TOKEN_ATTR_CONTROL) == 0) { From a15dbefebba98f5f53d57c9f16c96440264101a0 Mon Sep 17 00:00:00 2001 From: Xuan Son Nguyen Date: Mon, 19 Jan 2026 22:06:19 +0100 Subject: [PATCH 9/9] inherit vocab from glm4 --- convert_hf_to_gguf.py | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index 5d0aa617592..becbad046d7 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -7462,7 +7462,6 @@ def prepare_tensors(self): "KimiVLForConditionalGeneration", "YoutuForCausalLM", "YoutuVLForConditionalGeneration", - "Glm4MoeLiteForCausalLM", ) class DeepseekV2Model(TextModel): model_arch = gguf.MODEL_ARCH.DEEPSEEK2 @@ -8450,6 +8449,32 @@ def prepare_tensors(self): raise ValueError(f"Unprocessed experts: {experts}") +@ModelBase.register("Glm4MoeLiteForCausalLM") +class Glm4MoeLiteModel(DeepseekV2Model): + model_arch = gguf.MODEL_ARCH.DEEPSEEK2 + + # copied from Glm4MoeModel + def set_vocab(self): + from transformers import AutoTokenizer + + tokenizer = AutoTokenizer.from_pretrained(self.dir_model) + special_vocab = gguf.SpecialVocab(self.dir_model, load_merges=True) + tokens, toktypes, tokpre = self.get_vocab_base() + self.gguf_writer.add_tokenizer_model("gpt2") + self.gguf_writer.add_tokenizer_pre(tokpre) + self.gguf_writer.add_token_list(tokens) + self.gguf_writer.add_token_types(toktypes) + + # Special tokens + # Note: Using <|endoftext|> (151329) for eot causes endless generation + special_vocab._set_special_token("bos", tokenizer.get_added_vocab()["[gMASK]"]) # 151331 + special_vocab._set_special_token("eot", tokenizer.get_added_vocab()["<|user|>"]) # 151336 + special_vocab._set_special_token("unk", tokenizer.get_added_vocab()["<|endoftext|>"]) # 151329 + special_vocab._set_special_token("eom", tokenizer.get_added_vocab()["<|observation|>"]) # 151338 + + special_vocab.add_to_gguf(self.gguf_writer) + + @ModelBase.register("GlmForCausalLM", "ChatGLMModel", "ChatGLMForConditionalGeneration") class ChatGLMModel(TextModel): model_arch = gguf.MODEL_ARCH.CHATGLM