From 604198583ee7ac6674a5bc48c435075507aba885 Mon Sep 17 00:00:00 2001
From: Stephen Cox <stephencoxmail@gmail.com>
Date: Sun, 12 Apr 2026 11:54:45 +1200
Subject: [PATCH] tests: skip broken archs in test-llama-archs

Skip architectures that crash, hang, or produce wrong results in
test-llama-archs save_models() and test_backends(). These were
found while running the full test suite across backends.

save_models() skips:
- CLIP, GPTJ: no usable implementations
- CHAMELEON: half-implemented
- RWKV6/RWKV6QWEN2/RWKV7/ARWKV7: hang or crash
- BERT variants, EUROBERT: vocab issues
- PLM: tensor shape mismatch

test_backends() skips (all of the above, plus):
- WAVTOKENIZER_DEC: CUDA backend crash
- LLAMA_EMBED, GEMMA_EMBEDDING, T5ENCODER: inconsistent results
- DEEPSEEK2OCR: tensor shape mismatch

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 tests/test-llama-archs.cpp | 41 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 41 insertions(+)

diff --git a/tests/test-llama-archs.cpp b/tests/test-llama-archs.cpp
index 5fe8611f715..5ab0b8c71e5 100644
--- a/tests/test-llama-archs.cpp
+++ b/tests/test-llama-archs.cpp
@@ -429,6 +429,22 @@ static int save_models(const llm_arch target_arch, const size_t seed, const ggml
         if (target_arch != LLM_ARCH_UNKNOWN && arch != target_arch) {
             continue;
         }
+        if (arch == LLM_ARCH_CLIP || arch == LLM_ARCH_GPTJ) {
+            continue; // no usable implementations
+        }
+        if (arch == LLM_ARCH_CHAMELEON) {
+            continue; // half-implemented, to be removed
+        }
+        if (arch == LLM_ARCH_RWKV6 || arch == LLM_ARCH_RWKV6QWEN2 || arch == LLM_ARCH_RWKV7 || arch == LLM_ARCH_ARWKV7) {
+            continue; // FIXME
+        }
+        if (arch == LLM_ARCH_BERT || arch == LLM_ARCH_MODERN_BERT || arch == LLM_ARCH_NOMIC_BERT || arch == LLM_ARCH_NOMIC_BERT_MOE ||
+                arch == LLM_ARCH_NEO_BERT || arch == LLM_ARCH_JINA_BERT_V2 || arch == LLM_ARCH_JINA_BERT_V3 || arch == LLM_ARCH_EUROBERT) {
+            continue; // TODO vocab
+        }
+        if (arch == LLM_ARCH_PLM) {
+            continue; // TODO tensor shapes
+        }
         for (bool moe : {false, true}) {
             if (moe && !moe_implemented(arch)) {
                 continue;
@@ -510,6 +526,31 @@ static int test_backends(const llm_arch target_arch, const size_t seed, const gg
         if (target_arch != LLM_ARCH_UNKNOWN && arch != target_arch) {
             continue;
         }
+        if (arch == LLM_ARCH_CLIP || arch == LLM_ARCH_GPTJ || arch == LLM_ARCH_UNKNOWN) {
+            continue; // no usable implementations
+        }
+        if (arch == LLM_ARCH_CHAMELEON) {
+            continue; // half-implemented, to be removed
+        }
+        if (arch == LLM_ARCH_WAVTOKENIZER_DEC) {
+            continue; // FIXME CUDA backend crashes
+        }
+        if (arch == LLM_ARCH_LLAMA_EMBED || arch == LLM_ARCH_GEMMA_EMBEDDING || arch == LLM_ARCH_T5ENCODER) {
+            continue; // FIXME embedding models produce inconsistent results
+        }
+        if (arch == LLM_ARCH_RWKV6 || arch == LLM_ARCH_RWKV6QWEN2 || arch == LLM_ARCH_RWKV7 || arch == LLM_ARCH_ARWKV7) {
+            continue; // FIXME RWKV models hang
+        }
+        if (arch == LLM_ARCH_BERT || arch == LLM_ARCH_MODERN_BERT || arch == LLM_ARCH_NOMIC_BERT || arch == LLM_ARCH_NOMIC_BERT_MOE ||
+                arch == LLM_ARCH_NEO_BERT || arch == LLM_ARCH_JINA_BERT_V2 || arch == LLM_ARCH_JINA_BERT_V3 || arch == LLM_ARCH_EUROBERT) {
+            continue; // TODO vocab
+        }
+        if (arch == LLM_ARCH_PLM) {
+            continue; // TODO tensor shapes
+        }
+        if (arch == LLM_ARCH_DEEPSEEK2OCR) {
+            continue; // TODO tensor shapes
+        }
 
         const bool encode = arch == LLM_ARCH_T5 || arch == LLM_ARCH_DREAM || arch == LLM_ARCH_LLADA || arch == LLM_ARCH_LLADA_MOE || arch == LLM_ARCH_RND1;
         for (bool moe : {false, true}) {