From d32969d346d3f3e1584b720f3672f47c438faab1 Mon Sep 17 00:00:00 2001
From: ddh0 <chemist-mulches-39@icloud.com>
Date: Mon, 19 Jan 2026 10:34:42 -0600
Subject: [PATCH 1/9] initial commit for branch

---
 convert_hf_to_gguf.py        | 6 +++++-
 convert_hf_to_gguf_update.py | 1 +
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py
index 464ecbaab91..c7164026777 100755
--- a/convert_hf_to_gguf.py
+++ b/convert_hf_to_gguf.py
@@ -1255,6 +1255,9 @@ def get_vocab_base_pre(self, tokenizer) -> str:
         if chkhsh == "6c81ce329e0802883b22eabab0d3fa48357337ef1ecb45443828bf1f6254833f":
             # ref: https://huggingface.co/LGAI-EXAONE/K-EXAONE-236B-A23B
             res = "exaone-moe"
+        if chkhsh == "cdf5f35325780597efd76153d4d1c16778f766173908894c04afc20108536267":
+            # ref: https://huggingface.co/zai-org/GLM-4.7-Flash
+            res = "glm-4.7-flash"
 
         if res is None:
             logger.warning("\n")
@@ -7458,7 +7461,8 @@ def prepare_tensors(self):
     "DeepseekV3ForCausalLM",
     "KimiVLForConditionalGeneration",
     "YoutuForCausalLM",
-    "YoutuVLForConditionalGeneration"
+    "YoutuVLForConditionalGeneration",
+    "Glm4MoeLiteForCausalLM"
 )
 class DeepseekV2Model(TextModel):
     model_arch = gguf.MODEL_ARCH.DEEPSEEK2
diff --git a/convert_hf_to_gguf_update.py b/convert_hf_to_gguf_update.py
index aa9843ea17f..460198be692 100755
--- a/convert_hf_to_gguf_update.py
+++ b/convert_hf_to_gguf_update.py
@@ -148,6 +148,7 @@ class TOKENIZER_TYPE(IntEnum):
     {"name": "youtu",            "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/tencent/Youtu-LLM-2B", },
     {"name": "solar-open",       "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/upstage/Solar-Open-100B", },
     {"name": "exaone-moe",       "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/LGAI-EXAONE/K-EXAONE-236B-A23B", },
+    {"name": "glm-4.7-flash",    "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/zai-org/GLM-4.7-Flash", },
 ]
 
 # some models are known to be broken upstream, so we will skip them as exceptions

From 38e4882469a57eab0d022b1ae1de7bdb33eec4be Mon Sep 17 00:00:00 2001
From: ddh0 <chemist-mulches-39@icloud.com>
Date: Mon, 19 Jan 2026 11:01:07 -0600
Subject: [PATCH 2/9] add glm-4.7-flash, move tokenizer hash

---
 convert_hf_to_gguf_update.py | 3 +--
 src/llama-vocab.cpp          | 4 ++++
 src/llama-vocab.h            | 1 +
 3 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/convert_hf_to_gguf_update.py b/convert_hf_to_gguf_update.py
index 460198be692..1a8fbd5bb3e 100755
--- a/convert_hf_to_gguf_update.py
+++ b/convert_hf_to_gguf_update.py
@@ -148,7 +148,6 @@ class TOKENIZER_TYPE(IntEnum):
     {"name": "youtu",            "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/tencent/Youtu-LLM-2B", },
     {"name": "solar-open",       "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/upstage/Solar-Open-100B", },
     {"name": "exaone-moe",       "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/LGAI-EXAONE/K-EXAONE-236B-A23B", },
-    {"name": "glm-4.7-flash",    "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/zai-org/GLM-4.7-Flash", },
 ]
 
 # some models are known to be broken upstream, so we will skip them as exceptions
@@ -171,9 +170,9 @@ class TOKENIZER_TYPE(IntEnum):
     {"name": "grok-2",    "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/alvarobartt/grok-2-tokenizer", "chkhsh": "66b8d4e19ab16c3bfd89bce5d785fb7e0155e8648708a1f42077cb9fe002c273"},
     # jina-v2-de variants
     {"name": "jina-v2-de", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/aari1995/German_Semantic_V3", "chkhsh": "b3d1dd861f1d4c5c0d2569ce36baf3f90fe8a102db3de50dd71ff860d91be3df"},
+    {"name": "glm-4.7-flash", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/zai-org/GLM-4.7-Flash", "chkhsh": "cdf5f35325780597efd76153d4d1c16778f766173908894c04afc20108536267"},
 ]
 
-
 def download_file_with_auth(url, token, save_path):
     headers = {"Authorization": f"Bearer {token}"} if token else None
     response = sess.get(url, headers=headers)
diff --git a/src/llama-vocab.cpp b/src/llama-vocab.cpp
index a23950d007c..f332dbe7e56 100644
--- a/src/llama-vocab.cpp
+++ b/src/llama-vocab.cpp
@@ -2041,6 +2041,10 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
                 tokenizer_pre == "solar-open") {
                 pre_type = LLAMA_VOCAB_PRE_TYPE_SOLAR_OPEN;
                 clean_spaces = false;
+            } else if (
+                tokenizer_pre == "glm-4.7-flash") {
+                pre_type = LLAMA_VOCAB_PRE_TYPE_GLM_4_7_FLASH;
+                clean_spaces = false;
             } else {
                 throw std::runtime_error(format("unknown pre-tokenizer type: '%s'", tokenizer_pre.c_str()));
             }
diff --git a/src/llama-vocab.h b/src/llama-vocab.h
index 28c3a82b91e..20f94102a04 100644
--- a/src/llama-vocab.h
+++ b/src/llama-vocab.h
@@ -54,6 +54,7 @@ enum llama_vocab_pre_type {
     LLAMA_VOCAB_PRE_TYPE_SOLAR_OPEN      = 43,
     LLAMA_VOCAB_PRE_TYPE_YOUTU           = 44,
     LLAMA_VOCAB_PRE_TYPE_EXAONE_MOE      = 45,
+    LLAMA_VOCAB_PRE_TYPE_GLM_4_7_FLASH   = 46,
 };
 
 struct LLM_KV;

From eb630d4a408cd585c65428da2ba93760c35091c5 Mon Sep 17 00:00:00 2001
From: ddh0 <chemist-mulches-39@icloud.com>
Date: Mon, 19 Jan 2026 11:22:17 -0600
Subject: [PATCH 3/9] use `glm4` pretok

---
 convert_hf_to_gguf.py        | 2 +-
 convert_hf_to_gguf_update.py | 2 +-
 src/llama-vocab.cpp          | 4 ----
 src/llama-vocab.h            | 1 -
 4 files changed, 2 insertions(+), 7 deletions(-)

diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py
index c7164026777..894839aa95a 100755
--- a/convert_hf_to_gguf.py
+++ b/convert_hf_to_gguf.py
@@ -1257,7 +1257,7 @@ def get_vocab_base_pre(self, tokenizer) -> str:
             res = "exaone-moe"
         if chkhsh == "cdf5f35325780597efd76153d4d1c16778f766173908894c04afc20108536267":
             # ref: https://huggingface.co/zai-org/GLM-4.7-Flash
-            res = "glm-4.7-flash"
+            res = "glm4"
 
         if res is None:
             logger.warning("\n")
diff --git a/convert_hf_to_gguf_update.py b/convert_hf_to_gguf_update.py
index 1a8fbd5bb3e..2d3883fb408 100755
--- a/convert_hf_to_gguf_update.py
+++ b/convert_hf_to_gguf_update.py
@@ -170,7 +170,7 @@ class TOKENIZER_TYPE(IntEnum):
     {"name": "grok-2",    "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/alvarobartt/grok-2-tokenizer", "chkhsh": "66b8d4e19ab16c3bfd89bce5d785fb7e0155e8648708a1f42077cb9fe002c273"},
     # jina-v2-de variants
     {"name": "jina-v2-de", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/aari1995/German_Semantic_V3", "chkhsh": "b3d1dd861f1d4c5c0d2569ce36baf3f90fe8a102db3de50dd71ff860d91be3df"},
-    {"name": "glm-4.7-flash", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/zai-org/GLM-4.7-Flash", "chkhsh": "cdf5f35325780597efd76153d4d1c16778f766173908894c04afc20108536267"},
+    {"name": "glm4", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/zai-org/GLM-4.7-Flash", "chkhsh": "cdf5f35325780597efd76153d4d1c16778f766173908894c04afc20108536267"},
 ]
 
 def download_file_with_auth(url, token, save_path):
diff --git a/src/llama-vocab.cpp b/src/llama-vocab.cpp
index f332dbe7e56..a23950d007c 100644
--- a/src/llama-vocab.cpp
+++ b/src/llama-vocab.cpp
@@ -2041,10 +2041,6 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
                 tokenizer_pre == "solar-open") {
                 pre_type = LLAMA_VOCAB_PRE_TYPE_SOLAR_OPEN;
                 clean_spaces = false;
-            } else if (
-                tokenizer_pre == "glm-4.7-flash") {
-                pre_type = LLAMA_VOCAB_PRE_TYPE_GLM_4_7_FLASH;
-                clean_spaces = false;
             } else {
                 throw std::runtime_error(format("unknown pre-tokenizer type: '%s'", tokenizer_pre.c_str()));
             }
diff --git a/src/llama-vocab.h b/src/llama-vocab.h
index 20f94102a04..28c3a82b91e 100644
--- a/src/llama-vocab.h
+++ b/src/llama-vocab.h
@@ -54,7 +54,6 @@ enum llama_vocab_pre_type {
     LLAMA_VOCAB_PRE_TYPE_SOLAR_OPEN      = 43,
     LLAMA_VOCAB_PRE_TYPE_YOUTU           = 44,
     LLAMA_VOCAB_PRE_TYPE_EXAONE_MOE      = 45,
-    LLAMA_VOCAB_PRE_TYPE_GLM_4_7_FLASH   = 46,
 };
 
 struct LLM_KV;

From c64f9e003cb607420d624e498969fe70b046d786 Mon Sep 17 00:00:00 2001
From: ddh0 <chemist-mulches-39@icloud.com>
Date: Mon, 19 Jan 2026 12:26:06 -0600
Subject: [PATCH 4/9] silence flake8 E302 (CI)

---
 convert_hf_to_gguf_update.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/convert_hf_to_gguf_update.py b/convert_hf_to_gguf_update.py
index 2d3883fb408..2811f7f884a 100755
--- a/convert_hf_to_gguf_update.py
+++ b/convert_hf_to_gguf_update.py
@@ -173,6 +173,7 @@ class TOKENIZER_TYPE(IntEnum):
     {"name": "glm4", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/zai-org/GLM-4.7-Flash", "chkhsh": "cdf5f35325780597efd76153d4d1c16778f766173908894c04afc20108536267"},
 ]
 
+
 def download_file_with_auth(url, token, save_path):
     headers = {"Authorization": f"Bearer {token}"} if token else None
     response = sess.get(url, headers=headers)

From 354e2b525a662224379005d0e6ad764aec3779cf Mon Sep 17 00:00:00 2001
From: ddh0 <chemist-mulches-39@icloud.com>
Date: Mon, 19 Jan 2026 12:29:51 -0600
Subject: [PATCH 5/9] apply review feedback

---
 convert_hf_to_gguf.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py
index 894839aa95a..5d0aa617592 100755
--- a/convert_hf_to_gguf.py
+++ b/convert_hf_to_gguf.py
@@ -1078,6 +1078,9 @@ def get_vocab_base_pre(self, tokenizer) -> str:
         if chkhsh == "b3d1dd861f1d4c5c0d2569ce36baf3f90fe8a102db3de50dd71ff860d91be3df":
             # ref: https://huggingface.co/aari1995/German_Semantic_V3
             res = "jina-v2-de"
+        if chkhsh == "cdf5f35325780597efd76153d4d1c16778f766173908894c04afc20108536267":
+            # ref: https://huggingface.co/zai-org/GLM-4.7-Flash
+            res = "glm4"
         if chkhsh == "0ef9807a4087ebef797fc749390439009c3b9eda9ad1a097abbe738f486c01e5":
             # ref: https://huggingface.co/meta-llama/Meta-Llama-3-8B
             res = "llama-bpe"
@@ -1255,9 +1258,6 @@ def get_vocab_base_pre(self, tokenizer) -> str:
         if chkhsh == "6c81ce329e0802883b22eabab0d3fa48357337ef1ecb45443828bf1f6254833f":
             # ref: https://huggingface.co/LGAI-EXAONE/K-EXAONE-236B-A23B
             res = "exaone-moe"
-        if chkhsh == "cdf5f35325780597efd76153d4d1c16778f766173908894c04afc20108536267":
-            # ref: https://huggingface.co/zai-org/GLM-4.7-Flash
-            res = "glm4"
 
         if res is None:
             logger.warning("\n")
@@ -7462,7 +7462,7 @@ def prepare_tensors(self):
     "KimiVLForConditionalGeneration",
     "YoutuForCausalLM",
     "YoutuVLForConditionalGeneration",
-    "Glm4MoeLiteForCausalLM"
+    "Glm4MoeLiteForCausalLM",
 )
 class DeepseekV2Model(TextModel):
     model_arch = gguf.MODEL_ARCH.DEEPSEEK2

From 27961dbb9c3539dd9af6ee2b38e8cc1286710d52 Mon Sep 17 00:00:00 2001
From: Xuan Son Nguyen <son@huggingface.co>
Date: Mon, 19 Jan 2026 21:43:04 +0100
Subject: [PATCH 6/9] add <|user|> as eog

---
 src/llama-vocab.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/llama-vocab.cpp b/src/llama-vocab.cpp
index a23950d007c..0766e326287 100644
--- a/src/llama-vocab.cpp
+++ b/src/llama-vocab.cpp
@@ -2423,6 +2423,7 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
                     || t.first == "_<EOT>"
                     || t.first == "<|end_of_text|>"
                     || t.first == "<end_of_utterance>" // smoldocling
+                    || t.first == "<|user|>" // glm-4.7-lite
                ) {
                 special_eog_ids.insert(t.second);
                 if ((attr & LLAMA_TOKEN_ATTR_CONTROL) == 0) {

From 4d9befc540a0f2107cccfce1b28a3a644d67c80b Mon Sep 17 00:00:00 2001
From: ddh0 <chemist-mulches-39@icloud.com>
Date: Mon, 19 Jan 2026 14:53:17 -0600
Subject: [PATCH 7/9] also add EOG `<|observation|>`

---
 src/llama-vocab.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/llama-vocab.cpp b/src/llama-vocab.cpp
index 0766e326287..afc6bb35d0f 100644
--- a/src/llama-vocab.cpp
+++ b/src/llama-vocab.cpp
@@ -2424,6 +2424,7 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
                     || t.first == "<|end_of_text|>"
                     || t.first == "<end_of_utterance>" // smoldocling
                     || t.first == "<|user|>" // glm-4.7-lite
+                    || t.first == "<|observation|>" // glm-4.7-lite
                ) {
                 special_eog_ids.insert(t.second);
                 if ((attr & LLAMA_TOKEN_ATTR_CONTROL) == 0) {

From 9ce75e411bea17bff544bb711b45d33f21812a78 Mon Sep 17 00:00:00 2001
From: Xuan Son Nguyen <son@huggingface.co>
Date: Mon, 19 Jan 2026 22:03:39 +0100
Subject: [PATCH 8/9] revert llama-vocab

---
 src/llama-vocab.cpp | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/llama-vocab.cpp b/src/llama-vocab.cpp
index afc6bb35d0f..a23950d007c 100644
--- a/src/llama-vocab.cpp
+++ b/src/llama-vocab.cpp
@@ -2423,8 +2423,6 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
                     || t.first == "_<EOT>"
                     || t.first == "<|end_of_text|>"
                     || t.first == "<end_of_utterance>" // smoldocling
-                    || t.first == "<|user|>" // glm-4.7-lite
-                    || t.first == "<|observation|>" // glm-4.7-lite
                ) {
                 special_eog_ids.insert(t.second);
                 if ((attr & LLAMA_TOKEN_ATTR_CONTROL) == 0) {

From a15dbefebba98f5f53d57c9f16c96440264101a0 Mon Sep 17 00:00:00 2001
From: Xuan Son Nguyen <son@huggingface.co>
Date: Mon, 19 Jan 2026 22:06:19 +0100
Subject: [PATCH 9/9] inherit vocab from glm4

---
 convert_hf_to_gguf.py | 27 ++++++++++++++++++++++++++-
 1 file changed, 26 insertions(+), 1 deletion(-)

diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py
index 5d0aa617592..becbad046d7 100755
--- a/convert_hf_to_gguf.py
+++ b/convert_hf_to_gguf.py
@@ -7462,7 +7462,6 @@ def prepare_tensors(self):
     "KimiVLForConditionalGeneration",
     "YoutuForCausalLM",
     "YoutuVLForConditionalGeneration",
-    "Glm4MoeLiteForCausalLM",
 )
 class DeepseekV2Model(TextModel):
     model_arch = gguf.MODEL_ARCH.DEEPSEEK2
@@ -8450,6 +8449,32 @@ def prepare_tensors(self):
                 raise ValueError(f"Unprocessed experts: {experts}")
 
 
+@ModelBase.register("Glm4MoeLiteForCausalLM")
+class Glm4MoeLiteModel(DeepseekV2Model):
+    model_arch = gguf.MODEL_ARCH.DEEPSEEK2
+
+    # copied from Glm4MoeModel
+    def set_vocab(self):
+        from transformers import AutoTokenizer
+
+        tokenizer = AutoTokenizer.from_pretrained(self.dir_model)
+        special_vocab = gguf.SpecialVocab(self.dir_model, load_merges=True)
+        tokens, toktypes, tokpre = self.get_vocab_base()
+        self.gguf_writer.add_tokenizer_model("gpt2")
+        self.gguf_writer.add_tokenizer_pre(tokpre)
+        self.gguf_writer.add_token_list(tokens)
+        self.gguf_writer.add_token_types(toktypes)
+
+        # Special tokens
+        # Note: Using <|endoftext|> (151329) for eot causes endless generation
+        special_vocab._set_special_token("bos", tokenizer.get_added_vocab()["[gMASK]"])  # 151331
+        special_vocab._set_special_token("eot", tokenizer.get_added_vocab()["<|user|>"])  # 151336
+        special_vocab._set_special_token("unk", tokenizer.get_added_vocab()["<|endoftext|>"]) # 151329
+        special_vocab._set_special_token("eom", tokenizer.get_added_vocab()["<|observation|>"])  # 151338
+
+        special_vocab.add_to_gguf(self.gguf_writer)
+
+
 @ModelBase.register("GlmForCausalLM", "ChatGLMModel", "ChatGLMForConditionalGeneration")
 class ChatGLMModel(TextModel):
     model_arch = gguf.MODEL_ARCH.CHATGLM