Skip to content

Commit 44eda65

Browse files
committed
minor update of qwen3-tts. upload models.
1 parent 307d077 commit 44eda65

2 files changed

Lines changed: 36 additions & 0 deletions

File tree

models/qwen_tts.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -801,6 +801,10 @@ namespace chatllm::qwen::tts
801801
auto layer = new Qwen3TTSDecoderLayer(ctx, hidden_size, num_attention_heads, intermediate_size,
802802
num_key_value_heads, max_length, head_dim);
803803

804+
layer->set_id(i);
805+
if (ctx->dtype == ggml::type::GGML_TYPE_F16)
806+
layer->mlp.set_prec(ggml::prec::GGML_PREC_F32);
807+
804808
auto allocator = ctx->get_allocator();
805809
auto buf = allocator->alloc(layer->get_cache_size(), BackendBufAllocator::Usage::Matrix);
806810
layer->set_cache_buffer(buf);

scripts/models.json

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3785,6 +3785,22 @@
37853785
}
37863786
}
37873787
},
3788+
"nanbeige4.1": {
3789+
"brief": "Nanbeige4.1-3B is built upon Nanbeige4-3B-Base and represents an enhanced iteration of our previous reasoning model, Nanbeige4-3B-Thinking-2511.",
3790+
"default": "3b",
3791+
"license": "Apache License 2.0",
3792+
"variants": {
3793+
"3b": {
3794+
"default": "q8",
3795+
"quantized": {
3796+
"q8": {
3797+
"size": 4182500896,
3798+
"url": "chatllm_quantized_nanbeige/nanbeige4.1-3b.bin"
3799+
}
3800+
}
3801+
}
3802+
}
3803+
},
37883804
"glm-4.6v": {
37893805
"brief": "GLM-4.6V series model includes two versions: GLM-4.6V (106B), a foundation model designed for cloud and high-performance cluster scenarios, and GLM-4.6V-Flash (9B), a lightweight model optimized for local deployment and low-latency applications.",
37903806
"default": "flash",
@@ -4032,6 +4048,10 @@
40324048
"q8": {
40334049
"size": 1234409040,
40344050
"url": "chatllm_quantized_qwen3/qwen3-tts-12hz-0.6b-base.bin"
4051+
},
4052+
"f16": {
4053+
"size": 2174932560,
4054+
"url": "chatllm_quantized_qwen3/qwen3-tts-12hz-0.6b-base-f16.bin"
40354055
}
40364056
}
40374057
},
@@ -4041,6 +4061,10 @@
40414061
"q8": {
40424062
"size": 1216672496,
40434063
"url": "chatllm_quantized_qwen3/qwen3-tts-12hz-0.6b-customvoice.bin"
4064+
},
4065+
"f16": {
4066+
"size": 2157196016,
4067+
"url": "chatllm_quantized_qwen3/qwen3-tts-12hz-0.6b-customvoice-f16.bin"
40444068
}
40454069
}
40464070
},
@@ -4050,6 +4074,10 @@
40504074
"q8": {
40514075
"size": 2314950384,
40524076
"url": "chatllm_quantized_qwen3/qwen3-tts-12hz-1.7b-base.bin"
4077+
},
4078+
"f16": {
4079+
"size": 4203124464,
4080+
"url": "chatllm_quantized_qwen3/qwen3-tts-12hz-1.7b-base-f16.bin"
40534081
}
40544082
}
40554083
},
@@ -4059,6 +4087,10 @@
40594087
"q8": {
40604088
"size": 2290918288,
40614089
"url": "chatllm_quantized_qwen3/qwen3-tts-12hz-1.7b-customvoice.bin"
4090+
},
4091+
"f16": {
4092+
"size": 4179092368,
4093+
"url": "chatllm_quantized_qwen3/qwen3-tts-12hz-1.7b-customvoice-f16.bin"
40624094
}
40634095
}
40644096
},

0 commit comments

Comments
 (0)