requirements : update transformers to 5.5.1 (ggml-org#21617)

danbev · web-flow · commit c8ac02fa1b9f · 2026-04-09T12:36:29.000+02:00
* requirements : update transformers to 5.5.0 This commit updates the transformers dependency to version 5.5.0. The motivation for this is that transformers 5.5.0 includes support for Gemma4 and is required to be able to convert Gemma4 models. This is also causing issues for user of gguf-my-repo. Refs: https://huggingface.co/spaces/ggml-org/gguf-my-repo/discussions/202 * fix huggingface_hub version * set version of transformers to 5.5.0 * convert : add ty ignore directives to convert_hf_to_gguf.py This commit adds `ty: ignore` directives to transformers tokenizers field/methods to avoid type check errors. There might be better ways to handle this and perhaps this can be done in a follow up commit. The motivation for this is that it looks like in transformers 5.5.0 AutoTokenizer.from_pretrained can return generic tokenizer types or None and the type checker now produces an error when the conversion script accesses field like tokenizer.vocab. * convert : add ty ignore to suppress type check errors * convert : remove incorrect type ignores * convert : fix remaining python checks I was running a newer version of ty locally but I've switched to version 0.0.26 which is what CI uses and I was then able to reproduce the errors. Sorry about the noise. * update transformers version to 5.5.1
diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py
diff --git a/convert_hf_to_gguf_update.py b/convert_hf_to_gguf_update.py
@@ -296,7 +296,7 @@ def get_existing_models(convert_py):
         except Exception as e:
             raise OSError(f"Error loading tokenizer for model {name}.") from e
 
-        chktok = tokenizer.encode(CHK_TXT)
+        chktok = tokenizer.encode(CHK_TXT)  # ty: ignore[unresolved-attribute]
         chkhsh = sha256(str(chktok).encode()).hexdigest()
 
         logger.info(f"model: {name}")
@@ -468,7 +468,7 @@ def get_vocab_base_pre(self, tokenizer) -> str:
 
     with open(f"models/ggml-vocab-{name}.gguf.out", "w") as f:
         for text in tests:
-            res = tokenizer.encode(text, add_special_tokens=False)
+            res = tokenizer.encode(text, add_special_tokens=False)  # ty: ignore[unresolved-attribute]
             for r in res:
                 f.write(f" {r}")
             f.write("\n")
diff --git a/convert_lora_to_gguf.py b/convert_lora_to_gguf.py
@@ -402,7 +402,7 @@ def set_gguf_parameters(self):
                     # the invocation string includes the "<|start_of_turn|>"
                     # token, but the adapters themselves were trained to
                     # activate _after_ that first token, so we drop it here.
-                    alora_invocation_tokens = tokenizer(invocation_string)["input_ids"][1:]
+                    alora_invocation_tokens = tokenizer(invocation_string)["input_ids"][1:]  # ty: ignore[call-non-callable]
                 if alora_invocation_tokens:
                     logger.debug("GGUF KV: %s = %s", gguf.Keys.Adapter.ALORA_INVOCATION_TOKENS, alora_invocation_tokens)
                     self.gguf_writer.add_key_value(
diff --git a/examples/model-conversion/scripts/causal/run-casual-gen-embeddings-org.py b/examples/model-conversion/scripts/causal/run-casual-gen-embeddings-org.py
@@ -53,10 +53,10 @@
 print(f"Model name: {model_name}")
 
 prompt = "Hello world today"
-input_ids = tokenizer(prompt, return_tensors="pt").input_ids
+input_ids = tokenizer(prompt, return_tensors="pt").input_ids  # ty: ignore[call-non-callable]
 print(f"Input tokens: {input_ids}")
 print(f"Input text: {repr(prompt)}")
-print(f"Tokenized: {tokenizer.convert_ids_to_tokens(input_ids[0])}")
+print(f"Tokenized: {tokenizer.convert_ids_to_tokens(input_ids[0])}")  # ty: ignore[unresolved-attribute]
 
 with torch.no_grad():
     outputs = model(input_ids, output_hidden_states=True)
@@ -92,7 +92,7 @@
 
     # Print embeddings per token in the requested format
     print("\nToken embeddings:")
-    tokens = tokenizer.convert_ids_to_tokens(input_ids[0])
+    tokens = tokenizer.convert_ids_to_tokens(input_ids[0])  # ty: ignore[unresolved-attribute]
     for i, embedding in enumerate(token_embeddings):
         # Format: show first few values, ..., then last few values
         if len(embedding) > 10:
diff --git a/examples/model-conversion/scripts/utils/semantic_check.py b/examples/model-conversion/scripts/utils/semantic_check.py
@@ -207,8 +207,8 @@ def main():
         else:
             model = AutoModel.from_pretrained(args.model_path, trust_remote_code=True)
 
-    encoded = tokenizer(prompt, return_tensors="pt")
-    tokens = tokenizer.convert_ids_to_tokens(encoded['input_ids'][0])
+    encoded = tokenizer(prompt, return_tensors="pt")  # ty: ignore[call-non-callable]
+    tokens = tokenizer.convert_ids_to_tokens(encoded['input_ids'][0])  # ty: ignore[unresolved-attribute]
     n_tokens = len(tokens)
     print(f"n_tokens: {n_tokens}");
     print(f"hidden_size: {model.config.hidden_size}")
diff --git a/gguf-py/gguf/vocab.py b/gguf-py/gguf/vocab.py
@@ -543,7 +543,7 @@ def __init__(self, base_path: Path):
             cache_dir=base_path,
             local_files_only=True,
         )
-        assert self.tokenizer.is_fast  # assume tokenizer.json is used
+        assert self.tokenizer.is_fast  # assume tokenizer.json is used  # ty: ignore[unresolved-attribute]
 
         # Initialize lists and dictionaries for added tokens
         self.added_tokens_list = []
@@ -552,30 +552,30 @@ def __init__(self, base_path: Path):
 
         # Process added tokens
         for tok, tokidx in sorted(
-            self.tokenizer.get_added_vocab().items(), key=lambda x: x[1]
+            self.tokenizer.get_added_vocab().items(), key=lambda x: x[1]  # ty: ignore[unresolved-attribute]
         ):
             # Only consider added tokens that are not in the base vocabulary
-            if tokidx >= self.tokenizer.vocab_size:
+            if tokidx >= self.tokenizer.vocab_size:  # ty: ignore[unresolved-attribute]
                 self.added_tokens_list.append(tok)
                 self.added_tokens_dict[tok] = tokidx
                 self.added_tokens_ids.add(tokidx)
 
         # Store special tokens and their IDs
         self.specials = {
-            tok: self.tokenizer.get_vocab()[tok]
-            for tok in self.tokenizer.all_special_tokens
+            tok: self.tokenizer.get_vocab()[tok]  # ty: ignore[unresolved-attribute]
+            for tok in self.tokenizer.all_special_tokens  # ty: ignore[unresolved-attribute]
         }
-        self.special_ids = set(self.tokenizer.all_special_ids)
+        self.special_ids = set(self.tokenizer.all_special_ids)  # ty: ignore[unresolved-attribute]
 
         # Set vocabulary sizes
-        self.vocab_size_base = self.tokenizer.vocab_size
+        self.vocab_size_base = self.tokenizer.vocab_size  # ty: ignore[unresolved-attribute]
         self.vocab_size      = self.vocab_size_base + len(self.added_tokens_list)
 
         self.fname_tokenizer = fname_tokenizer
 
     def hf_tokens(self) -> Iterable[tuple[bytes, float, gguf.TokenType]]:
         reverse_vocab = {
-            id: encoded_tok for encoded_tok, id in self.tokenizer.get_vocab().items()
+            id: encoded_tok for encoded_tok, id in self.tokenizer.get_vocab().items()  # ty: ignore[unresolved-attribute]
         }
 
         for token_id in range(self.vocab_size_base):
@@ -616,7 +616,7 @@ def added_tokens(self) -> Iterable[tuple[bytes, float, gguf.TokenType]]:
             yield text.encode("utf-8"), score, toktype
 
     def has_newline_token(self):
-        return "<0x0A>" in self.tokenizer.vocab or "\n" in self.tokenizer.vocab
+        return "<0x0A>" in self.tokenizer.vocab or "\n" in self.tokenizer.vocab  # ty: ignore[unresolved-attribute]
 
     def all_tokens(self) -> Iterable[tuple[bytes, float, gguf.TokenType]]:
         yield from self.hf_tokens()
diff --git a/pyproject.toml b/pyproject.toml
@@ -18,7 +18,7 @@ classifiers = [
 python = ">=3.9"
 numpy = "^1.25.0"
 sentencepiece = ">=0.1.98,<0.3.0"
-transformers = ">=4.35.2,<5.0.0"
+transformers = "==5.5.1"
 protobuf = ">=4.21.0,<5.0.0"
 gguf = { path = "./gguf-py" }
 torch = { version = "^2.2.0", source = "pytorch" }
diff --git a/requirements/requirements-convert_legacy_llama.txt b/requirements/requirements-convert_legacy_llama.txt
@@ -1,7 +1,7 @@
 numpy~=1.26.4
 sentencepiece>=0.1.98,<0.3.0
 
-transformers>=4.57.1,<5.0.0
+transformers==5.5.1
 
 gguf>=0.1.0
 protobuf>=4.21.0,<5.0.0
diff --git a/requirements/requirements-tool_bench.txt b/requirements/requirements-tool_bench.txt
@@ -1,6 +1,6 @@
 aiohttp~=3.9.3
 pytest~=8.3.3
-huggingface_hub>=0.34.0,<1.0
+huggingface_hub>=1.5.0,<2.0
 matplotlib~=3.10.0
 numpy~=1.26.4
 openai~=2.14.0
diff --git a/tests/test-tokenizer-0.py b/tests/test-tokenizer-0.py
@@ -19,7 +19,7 @@
     lines = f.readlines()
     s = ''.join(lines)
     t_start = time.time()
-    res = tokenizer.encode(s, add_special_tokens=False)
+    res = tokenizer.encode(s, add_special_tokens=False)  # ty: ignore[unresolved-attribute]
     t_end = time.time()
     print('\nmain : tokenized in', "{:.3f}".format(1000.0 * (t_end - t_start)), 'ms (py)') # noqa: NP100
     with open(fname_out, 'w', encoding='utf-8') as f:
diff --git a/tests/test-tokenizer-random.py b/tests/test-tokenizer-random.py
@@ -128,7 +128,7 @@ def decode(self, ids: list[int]) -> str:
 class TokenizerGroundtruth (Tokenizer):
 
     def __init__(self, dir_tokenizer: str):
-        self.model: PreTrainedTokenizer = AutoTokenizer.from_pretrained(dir_tokenizer)
+        self.model: PreTrainedTokenizer = AutoTokenizer.from_pretrained(dir_tokenizer)  # ty: ignore[invalid-assignment]
         # guess BOS and EOS
         ids = self.encode("a")
         assert 1 <= len(ids) <= 3
@@ -142,15 +142,15 @@ def __init__(self, dir_tokenizer: str):
         self.vocab = list(sorted(self.vocab))
         # tokens and lists
         self.special_tokens = list(self.model.all_special_tokens)
-        self.added_tokens   = self.model.batch_decode(self.model.added_tokens_encoder.values(), skip_special_tokens=False)
+        self.added_tokens   = self.model.batch_decode(list(self.model.added_tokens_encoder.values()), skip_special_tokens=False)
         self.bos_token = self.model.bos_token
         self.eos_token = self.model.eos_token
 
     def encode(self, text: str) -> list[int]:
         return self.model.encode(text, add_special_tokens=True)
 
     def decode(self, ids: list[int]) -> str:
-        return self.model.decode(ids, skip_special_tokens=False)
+        return self.model.decode(ids, skip_special_tokens=False)  # ty: ignore[invalid-return-type]
 
 
 class TokenizerLlamaCpp (Tokenizer):
diff --git a/tools/server/tests/requirements.txt b/tools/server/tests/requirements.txt
@@ -1,6 +1,6 @@
 aiohttp~=3.9.3
 pytest~=8.3.3
-huggingface_hub>=0.34.0,<1.0
+huggingface_hub>=1.5.0,<2.0
 numpy~=1.26.4
 openai~=2.14.0
 prometheus-client~=0.20.0