diff --git a/CMakeLists.txt b/CMakeLists.txt index 9ad6f9a8..fc7b8b57 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -403,6 +403,8 @@ set(dsnote_lib_sources ${sources_dir}/f5_engine.hpp ${sources_dir}/kokoro_engine.cpp ${sources_dir}/kokoro_engine.hpp + ${sources_dir}/qwen3tts_engine.cpp + ${sources_dir}/qwen3tts_engine.hpp ${sources_dir}/wl_clipboard.cpp ${sources_dir}/wl_clipboard.hpp ) diff --git a/config/models.json b/config/models.json index 54a56e48..8fe452cb 100644 --- a/config/models.json +++ b/config/models.json @@ -36174,6 +36174,55 @@ "engine": "tts_kokoro", "lang_id": "zh" }, + { + "name": "Français (Qwen3-TTS Chelsie Female)", + "id": "fr_qwen3_chelsie", + "engine": "tts_qwen3", + "lang_id": "fr", + "speaker": "Chelsie" + }, + { + "name": "Français (Qwen3-TTS Serena Female)", + "id": "fr_qwen3_serena", + "engine": "tts_qwen3", + "lang_id": "fr", + "speaker": "Serena" + }, + { + "name": "Français (Qwen3-TTS Aria Female)", + "id": "fr_qwen3_aria", + "engine": "tts_qwen3", + "lang_id": "fr", + "speaker": "Aria" + }, + { + "name": "Français (Qwen3-TTS Ethan Male)", + "id": "fr_qwen3_ethan", + "engine": "tts_qwen3", + "lang_id": "fr", + "speaker": "Ethan" + }, + { + "name": "Français (Qwen3-TTS Aidan Male)", + "id": "fr_qwen3_aidan", + "engine": "tts_qwen3", + "lang_id": "fr", + "speaker": "Aidan" + }, + { + "name": "English (Qwen3-TTS Chelsie Female)", + "id": "en_qwen3_chelsie", + "engine": "tts_qwen3", + "lang_id": "en", + "speaker": "Chelsie" + }, + { + "name": "English (Qwen3-TTS Ethan Male)", + "id": "en_qwen3_ethan", + "engine": "tts_qwen3", + "lang_id": "en", + "speaker": "Ethan" + }, { "name": "English (Parler-TTS)", "id": "en_parler", diff --git a/src/dsnote_app.cpp b/src/dsnote_app.cpp index 8a5bcf7e..2834f66d 100644 --- a/src/dsnote_app.cpp +++ b/src/dsnote_app.cpp @@ -4729,6 +4729,15 @@ bool dsnote_app::feature_kokoro_gpu() const { feature_available("kokoro-tts-hip", false); } +bool dsnote_app::feature_qwen3_tts() const { + return feature_available("qwen3-tts", false); +} + +bool dsnote_app::feature_qwen3_gpu() const { + return feature_available("qwen3-tts-cuda", false) || + feature_available("qwen3-tts-hip", false); +} + bool dsnote_app::feature_punctuator() const { return feature_available("punctuator", false); } @@ -4902,6 +4911,8 @@ QVariantList dsnote_app::features_availability() { feature_available("kokoro-tts-ja", false), /*tts_kokoro_zh=*/ feature_available("kokoro-tts-zh", false), + /*tts_qwen3=*/ + feature_available("qwen3-tts", false), /*stt_fasterwhisper=*/ feature_available("faster-whisper-stt", false), /*stt_ds=*/feature_available("coqui-stt", false), diff --git a/src/dsnote_app.h b/src/dsnote_app.h index db303c21..9f9e661e 100644 --- a/src/dsnote_app.h +++ b/src/dsnote_app.h @@ -71,6 +71,8 @@ X(f5_gpu) \ X(kokoro_tts) \ X(kokoro_gpu) \ + X(qwen3_tts) \ + X(qwen3_gpu) \ X(punctuator) \ X(diacritizer_he) \ X(translator) \ diff --git a/src/models_manager.cpp b/src/models_manager.cpp index edb20718..0861f092 100644 --- a/src/models_manager.cpp +++ b/src/models_manager.cpp @@ -146,6 +146,7 @@ QDebug operator<<(QDebug d, models_manager::feature_flags flags) { if (flags & models_manager::engine_tts_parler) d << "engine-tts-parler, "; if (flags & models_manager::engine_tts_f5) d << "engine-tts-f5, "; if (flags & models_manager::engine_tts_kokoro) d << "engine-tts-kokoro, "; + if (flags & models_manager::engine_tts_qwen3) d << "engine-tts-qwen3, "; if (flags & models_manager::engine_other) d << "engine-other, "; if (flags & models_manager::hw_openvino) d << "hw-openvino, "; if (flags & models_manager::stt_intermediate_results) @@ -212,6 +213,9 @@ QDebug operator<<(QDebug d, models_manager::model_engine_t engine) { case models_manager::model_engine_t::tts_kokoro: d << "tts-kokoro"; break; + case models_manager::model_engine_t::tts_qwen3: + d << "tts-qwen3"; + break; case models_manager::model_engine_t::mnt_bergamot: d << "mnt-bergamot"; break; @@ -261,6 +265,7 @@ QDebug operator<<(QDebug d, if (models_availability.tts_kokoro) d << "tts_kokoro,"; if (models_availability.tts_kokoro_ja) d << "tts_kokoro_ja,"; if (models_availability.tts_kokoro_zh) d << "tts_kokoro_zh,"; + if (models_availability.tts_qwen3) d << "tts_qwen3,"; if (models_availability.stt_fasterwhisper) d << "stt_fasterwhisper,"; if (models_availability.stt_ds) d << "stt_ds,"; if (models_availability.stt_vosk) d << "stt_vosk,"; @@ -1575,6 +1580,7 @@ bool models_manager::is_modelless_engine(model_engine_t engine) { switch (engine) { case model_engine_t::tts_espeak: case model_engine_t::tts_sam: + case model_engine_t::tts_qwen3: return true; case model_engine_t::stt_ds: case model_engine_t::stt_vosk: @@ -1611,6 +1617,7 @@ bool models_manager::is_ignore_on_sfos(model_engine_t engine, case model_engine_t::tts_parler: case model_engine_t::tts_f5: case model_engine_t::tts_kokoro: + case model_engine_t::tts_qwen3: case model_engine_t::tts_coqui: return true; case model_engine_t::stt_april: @@ -1719,6 +1726,7 @@ models_manager::model_role_t models_manager::role_of_engine( case model_engine_t::tts_parler: case model_engine_t::tts_f5: case model_engine_t::tts_kokoro: + case model_engine_t::tts_qwen3: return model_role_t::tts; case model_engine_t::mnt_bergamot: return model_role_t::mnt; @@ -1752,6 +1760,7 @@ models_manager::model_engine_t models_manager::engine_from_name( if (name == QStringLiteral("tts_parler")) return model_engine_t::tts_parler; if (name == QStringLiteral("tts_f5")) return model_engine_t::tts_f5; if (name == QStringLiteral("tts_kokoro")) return model_engine_t::tts_kokoro; + if (name == QStringLiteral("tts_qwen3")) return model_engine_t::tts_qwen3; if (name == QStringLiteral("mnt_bergamot")) return model_engine_t::mnt_bergamot; @@ -1894,6 +1903,7 @@ models_manager::feature_flags models_manager::add_new_feature( case feature_flags::engine_tts_parler: case feature_flags::engine_tts_f5: case feature_flags::engine_tts_kokoro: + case feature_flags::engine_tts_qwen3: case feature_flags::engine_mnt: case feature_flags::engine_other: if (existing_features & feature_flags::engine_stt_ds || @@ -2087,6 +2097,16 @@ models_manager::feature_flags models_manager::add_implicit_feature_flags( existing_features, score == 0 ? feature_flags::low_quality : feature_flags::high_quality); break; + case model_engine_t::tts_qwen3: + existing_features = + add_new_feature(existing_features, + feature_flags::engine_tts_qwen3) | + add_new_feature(existing_features, + feature_flags::medium_processing); + existing_features = add_new_feature( + existing_features, score == 0 ? feature_flags::low_quality + : feature_flags::high_quality); + break; case model_engine_t::tts_rhvoice: existing_features = add_new_feature(existing_features, @@ -2387,6 +2407,11 @@ auto models_manager::extract_models( qDebug() << "ignoring kokoro model:" << model_id; continue; } + if (!models_availability->tts_qwen3 && + engine == model_engine_t::tts_qwen3) { + qDebug() << "ignoring qwen3 model:" << model_id; + continue; + } if (!models_availability->tts_rhvoice && engine == model_engine_t::tts_rhvoice) { qDebug() << "ignoring rhvoice model:" << model_id; @@ -2793,6 +2818,7 @@ QString models_manager::file_name_from_id(const QString& id, case model_engine_t::tts_parler: case model_engine_t::tts_f5: case model_engine_t::tts_kokoro: + case model_engine_t::tts_qwen3: case model_engine_t::mnt_bergamot: return id; } @@ -2990,6 +3016,11 @@ void models_manager::update_models_using_availability_internal() { return; } } + if (!m_models_availability->tts_qwen3 && + pair.second.engine == model_engine_t::tts_qwen3) { + pair.second.disabled = true; + return; + } if (pair.second.engine == model_engine_t::tts_mimic3) { if (!m_models_availability->tts_mimic3 || (!m_models_availability->tts_mimic3_de && diff --git a/src/models_manager.h b/src/models_manager.h index 328cd11d..eda076ab 100644 --- a/src/models_manager.h +++ b/src/models_manager.h @@ -62,6 +62,7 @@ class models_manager : public QObject, public singleton { tts_parler, tts_f5, tts_kokoro, + tts_qwen3, mnt_bergamot }; friend QDebug operator<<(QDebug d, model_engine_t engine); @@ -91,6 +92,7 @@ class models_manager : public QObject, public singleton { engine_tts_parler = 1U << 18U, engine_tts_f5 = 1U << 19U, engine_tts_kokoro = 1U << 20U, + engine_tts_qwen3 = 1U << 21U, engine_mnt = 1U << 23U, engine_other = 1U << 24U, generic_end = engine_other, @@ -189,6 +191,7 @@ class models_manager : public QObject, public singleton { bool tts_kokoro = false; bool tts_kokoro_ja = false; bool tts_kokoro_zh = false; + bool tts_qwen3 = false; bool stt_fasterwhisper = false; bool stt_ds = false; bool stt_vosk = false; diff --git a/src/py_tools.cpp b/src/py_tools.cpp index a41190b6..67bbb476 100644 --- a/src/py_tools.cpp +++ b/src/py_tools.cpp @@ -41,7 +41,8 @@ std::ostream& operator<<(std::ostream& os, << ", whisperspeech-tts=" << availability.whisperspeech_tts << ", parler-tts=" << availability.parler_tts << ", f5-tts=" << availability.f5_tts - << ", kokoro-tts=" << availability.f5_tts + << ", kokoro-tts=" << availability.kokoro_tts + << ", qwen3-tts=" << availability.qwen3_tts << ", transformers=" << availability.transformers << ", unikud=" << availability.unikud << ", gruut_de=" << availability.gruut_de @@ -81,6 +82,7 @@ libs_availability_t libs_availability(libs_scan_type_t scan_type, availability.kokoro_tts = true; availability.kokoro_ja = true; availability.kokoro_zh = true; + availability.qwen3_tts = true; } availability.faster_whisper = true; availability.mimic3_tts = true; @@ -226,6 +228,14 @@ libs_availability_t libs_availability(libs_scan_type_t scan_type, LOGD("kokoro tts check py error: " << err.what()); } + try { + LOGD("checking: qwen3 tts"); + py::module_::import("qwen_tts"); + availability.qwen3_tts = true; + } catch (const std::exception& err) { + LOGD("qwen3 tts check py error: " << err.what()); + } + try { LOGD("checking: faster-whisper"); py::module_::import("faster_whisper"); diff --git a/src/py_tools.hpp b/src/py_tools.hpp index 7ac76b51..05f8a05a 100644 --- a/src/py_tools.hpp +++ b/src/py_tools.hpp @@ -32,6 +32,7 @@ struct libs_availability_t { bool parler_tts = false; bool f5_tts = false; bool kokoro_tts = false; + bool qwen3_tts = false; bool transformers = false; bool unikud = false; bool gruut_de = false; diff --git a/src/qwen3tts_engine.cpp b/src/qwen3tts_engine.cpp new file mode 100644 index 00000000..538edf56 --- /dev/null +++ b/src/qwen3tts_engine.cpp @@ -0,0 +1,182 @@ +/* Copyright (C) 2025 Michal Kosciesza + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +#include "qwen3tts_engine.hpp" + +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "cpu_tools.hpp" +#include "logger.hpp" +#include "py_executor.hpp" + +using namespace pybind11::literals; + +qwen3tts_engine::qwen3tts_engine(config_t config, callbacks_t call_backs) + : tts_engine{std::move(config), std::move(call_backs)} { + if ((cpu_tools::cpuinfo().feature_flags & + cpu_tools::feature_flags_t::avx) == 0) { + LOGE("avx not supported but qwen3tts engine needs it"); + throw std::runtime_error( + "failed to init qwen3tts engine: avx not supported"); + } +} + +qwen3tts_engine::~qwen3tts_engine() { + LOGD("qwen3tts dtor"); + + stop(); +} + +void qwen3tts_engine::stop() { + tts_engine::stop(); + + auto task = py_executor::instance()->execute([&]() { + try { + m_model.reset(); + + // release mem + py::module_::import("gc").attr("collect")(); + py::module_::import("torch").attr("cuda").attr("empty_cache")(); + } catch (const std::exception& err) { + LOGE("py error: " << err.what()); + } + return std::any{}; + }); + + if (task) task->get(); + + LOGD("qwen3tts stopped"); +} + +void qwen3tts_engine::create_model() { + auto task = py_executor::instance()->execute([&]() { + auto use_cuda = + m_config.use_gpu && + (py_executor::instance()->libs_availability->torch_cuda || + py_executor::instance()->libs_availability->torch_hip); + + LOGD("using device: " << (use_cuda ? "cuda" : "cpu") << " " + << m_config.gpu_device.id); + + m_device_str = use_cuda ? "cuda" : "cpu"; + + try { + auto qwen_tts = py::module_::import("qwen_tts"); + auto torch = py::module_::import("torch"); + + // Use local model path if available, otherwise use HF Hub name + auto model_path = m_config.model_files.model_path; + if (model_path.empty() || model_path == ".") { + model_path = "Qwen/Qwen3-TTS-12Hz-0.6B-CustomVoice"; + } + + LOGD("loading qwen3tts model from: " << model_path); + + auto dtype = use_cuda ? torch.attr("float16") + : torch.attr("float32"); + + auto cache_dir = m_config.cache_dir; + if (cache_dir.empty()) { + m_model = qwen_tts.attr("QwenTTS")( + "model_path"_a = model_path, "device"_a = m_device_str, + "dtype"_a = dtype); + } else { + m_model = qwen_tts.attr("QwenTTS")( + "model_path"_a = model_path, "device"_a = m_device_str, + "dtype"_a = dtype, "cache_dir"_a = cache_dir); + } + + } catch (const std::exception& err) { + LOGE("py error: " << err.what()); + return false; + } + return true; + }); + + if (!task || !std::any_cast(task->get())) + LOGE("failed to create qwen3tts model"); + else + LOGD("qwen3tts model created"); +} + +bool qwen3tts_engine::model_created() const { return m_model.has_value(); } + +bool qwen3tts_engine::encode_speech_impl(const std::string& text, + unsigned int speed, + const std::string& out_file) { + auto speech_speed = std::clamp(speed, 1U, 20U) / 10.0; + + auto task = py_executor::instance()->execute([&]() { + try { + // Determine speaker from config, default to French female voice + auto speaker = m_config.speaker_id; + if (speaker.empty()) { + speaker = "Chelsie"; + } + + LOGD("qwen3tts generating speech with speaker: " + << speaker << ", speed: " << speech_speed); + + // Use generate_custom_voice for preset speakers + auto result = m_model->attr("generate_custom_voice")( + text, "speaker"_a = speaker, "speed"_a = speech_speed); + + // result is a dict with 'audio' (numpy array) and 'sample_rate' + py::array_t + audio_arr = result.attr("__getitem__")("audio") + .attr("squeeze")() + .attr("cpu")() + .attr("numpy")(); + + auto result_sample_rate = + result.attr("__getitem__")("sample_rate").cast(); + + auto buffer = audio_arr.request(); + + std::ofstream os{out_file, std::ios::binary}; + os.seekp(sizeof(wav_header)); + auto data_start = os.tellp(); + + for (ssize_t i = 0; i < buffer.size; ++i) { + if (is_shutdown()) throw std::runtime_error{"engine shutdown"}; + + // convert f32 to s16 sample format + auto sample = static_cast( + std::clamp(static_cast(buffer.ptr)[i], -1.0F, + 1.0F) * + 32767.0F); + os.write(reinterpret_cast(&sample), 2); + } + + auto data_size = os.tellp() - data_start; + + os.seekp(0); + write_wav_header(result_sample_rate, sizeof(int16_t), 1, + data_size / sizeof(int16_t), os); + + } catch (const std::exception& err) { + LOGE("py error: " << err.what()); + unlink(out_file.c_str()); + return false; + } + + LOGD("voice synthesized successfully"); + return true; + }); + + return task && std::any_cast(task->get()); +} + +bool qwen3tts_engine::model_supports_speed() const { return true; } diff --git a/src/qwen3tts_engine.hpp b/src/qwen3tts_engine.hpp new file mode 100644 index 00000000..90cc3b9f --- /dev/null +++ b/src/qwen3tts_engine.hpp @@ -0,0 +1,41 @@ +/* Copyright (C) 2025 Michal Kosciesza + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +#ifndef QWEN3TTS_ENGINE_HPP +#define QWEN3TTS_ENGINE_HPP + +#undef slots +#include +#include +#define slots Q_SLOTS + +#include +#include + +#include "tts_engine.hpp" + +namespace py = pybind11; + +class qwen3tts_engine : public tts_engine { + public: + qwen3tts_engine(config_t config, callbacks_t call_backs); + ~qwen3tts_engine() override; + + private: + static const int s_sample_rate = 24000; + std::optional m_model; + std::string m_device_str; + + bool model_created() const final; + bool model_supports_speed() const final; + void create_model() final; + bool encode_speech_impl(const std::string& text, unsigned int speed, + const std::string& out_file) final; + void stop(); +}; + +#endif // QWEN3TTS_ENGINE_HPP diff --git a/src/settings.cpp b/src/settings.cpp index 6b8409c7..3d142b16 100644 --- a/src/settings.cpp +++ b/src/settings.cpp @@ -1547,6 +1547,11 @@ void settings::scan_hw_devices(unsigned int hw_feature_flags) { 0; bool disable_kokoro_hip = (hw_feature_flags & hw_feature_flags_t::hw_feature_tts_kokoro_hip) == 0; + bool disable_qwen3_cuda = + (hw_feature_flags & hw_feature_flags_t::hw_feature_tts_qwen3_cuda) == + 0; + bool disable_qwen3_hip = + (hw_feature_flags & hw_feature_flags_t::hw_feature_tts_qwen3_hip) == 0; auto result = gpu_tools::available_devices( /*cuda=*/hw_scan_cuda(), @@ -1577,7 +1582,7 @@ void settings::scan_hw_devices(unsigned int hw_feature_flags) { if (disable_fasterwhisper_cuda && disable_whispercpp_cuda && disable_coqui_cuda && disable_whisperspeech_cuda && disable_parler_cuda && disable_f5_cuda && - disable_kokoro_cuda) + disable_kokoro_cuda && disable_qwen3_cuda) return; auto item = QStringLiteral("%1, %2, %3") @@ -1596,13 +1601,15 @@ void settings::scan_hw_devices(unsigned int hw_feature_flags) { if (!disable_f5_cuda) m_f5_gpu_devices.push_back(item); if (!disable_kokoro_cuda) m_kokoro_gpu_devices.push_back(item); + if (!disable_qwen3_cuda) + m_qwen3_gpu_devices.push_back(item); break; } case gpu_tools::api_t::rocm: { if (disable_fasterwhisper_hip && disable_whispercpp_hip && disable_coqui_hip && disable_whisperspeech_hip && disable_parler_hip && disable_f5_hip && - disable_kokoro_hip) + disable_kokoro_hip && disable_qwen3_hip) return; auto item = QStringLiteral("%1, %2, %3") @@ -1620,6 +1627,8 @@ void settings::scan_hw_devices(unsigned int hw_feature_flags) { if (!disable_f5_hip) m_f5_gpu_devices.push_back(item); if (!disable_kokoro_hip) m_kokoro_gpu_devices.push_back(item); + if (!disable_qwen3_hip) + m_qwen3_gpu_devices.push_back(item); m_rocm_gpu_versions.push_back( QString::fromStdString(device.platform_name)); break; diff --git a/src/settings.h b/src/settings.h index d3a80189..135e82fe 100644 --- a/src/settings.h +++ b/src/settings.h @@ -74,7 +74,8 @@ X(whisperspeech, true) \ X(parler, true) \ X(f5, true) \ - X(kokoro, true) + X(kokoro, true) \ + X(qwen3, true) // id, action-name, description, default-key-combination, trigger-on-deactivate #define HOTKEY_TABLE \ @@ -601,6 +602,8 @@ class settings : public QSettings, public singleton { hw_feature_tts_f5_hip = 1U << 14U, hw_feature_tts_kokoro_cuda = 1U << 15U, hw_feature_tts_kokoro_hip = 1U << 16U, + hw_feature_tts_qwen3_cuda = 1U << 17U, + hw_feature_tts_qwen3_hip = 1U << 18U, hw_feature_all = hw_feature_stt_whispercpp_cuda | hw_feature_stt_whispercpp_hip | hw_feature_stt_whispercpp_openvino | @@ -612,7 +615,8 @@ class settings : public QSettings, public singleton { hw_feature_tts_whisperspeech_hip | hw_feature_tts_parler_cuda | hw_feature_tts_parler_hip | hw_feature_tts_f5_cuda | hw_feature_tts_f5_hip | hw_feature_tts_kokoro_cuda | - hw_feature_tts_kokoro_hip + hw_feature_tts_kokoro_hip | hw_feature_tts_qwen3_cuda | + hw_feature_tts_qwen3_hip }; friend QDebug operator<<(QDebug d, hw_feature_flags_t hw_feature_flags); diff --git a/src/speech_service.cpp b/src/speech_service.cpp index 99371916..5fe43733 100644 --- a/src/speech_service.cpp +++ b/src/speech_service.cpp @@ -30,6 +30,7 @@ #include "gpu_tools.hpp" #include "kokoro_engine.hpp" #include "media_compressor.hpp" +#include "qwen3tts_engine.hpp" #include "mic_source.h" #include "mimic3_engine.hpp" #include "module_tools.hpp" @@ -1733,6 +1734,10 @@ QString speech_service::restart_tts_engine(const QString &model_id, models_manager::model_engine_t::tts_kokoro && type != typeid(kokoro_engine)) return true; + if (model_config->tts->engine == + models_manager::model_engine_t::tts_qwen3 && + type != typeid(qwen3tts_engine)) + return true; if (m_tts_engine->model_files() != config.model_files) return true; @@ -1865,6 +1870,10 @@ QString speech_service::restart_tts_engine(const QString &model_id, m_tts_engine = std::make_unique( std::move(config), std::move(call_backs)); break; + case models_manager::model_engine_t::tts_qwen3: + m_tts_engine = std::make_unique( + std::move(config), std::move(call_backs)); + break; case models_manager::model_engine_t::ttt_hftc: case models_manager::model_engine_t::ttt_tashkeel: case models_manager::model_engine_t::ttt_unikud: @@ -3035,6 +3044,9 @@ QVariantMap speech_service::features_availability() { "kokoro-tts-zh", QVariantList{py_availability->kokoro_tts && py_availability->kokoro_zh, "Kokoro TTS " + tr("Chinese")}); + m_features_availability.insert( + "qwen3-tts", + QVariantList{py_availability->qwen3_tts, "Qwen3 TTS"}); #ifdef ARCH_X86_64 auto has_cuda = gpu_tools::has_cuda_runtime(); auto has_cudnn = gpu_tools::has_cudnn(); @@ -3135,6 +3147,25 @@ QVariantMap speech_service::features_availability() { if (tts_kokoro_hip) hw_feature_flags |= settings::hw_feature_flags_t::hw_feature_tts_kokoro_hip; + + bool tts_qwen3_cuda = + py_availability->qwen3_tts && py_availability->torch_cuda; + bool tts_qwen3_hip = + py_availability->qwen3_tts && py_availability->torch_hip; + m_features_availability.insert( + "qwen3-tts-cuda", + QVariantList{tts_qwen3_cuda, + "Qwen3 TTS CUDA " + tr("HW acceleration")}); + m_features_availability.insert( + "qwen3-tts-hip", + QVariantList{tts_qwen3_hip, + "Qwen3 TTS ROCm " + tr("HW acceleration")}); + if (tts_qwen3_cuda) + hw_feature_flags |= + settings::hw_feature_flags_t::hw_feature_tts_qwen3_cuda; + if (tts_qwen3_hip) + hw_feature_flags |= + settings::hw_feature_flags_t::hw_feature_tts_qwen3_hip; #endif m_features_availability.insert( "coqui-tts-ja", QVariantList{py_availability->coqui_tts && @@ -3311,6 +3342,7 @@ QVariantMap speech_service::features_availability() { /*tts_kokoro=*/py_availability->kokoro_tts, /*tts_kokoro_ja=*/py_availability->kokoro_ja, /*tts_kokoro_zh=*/py_availability->kokoro_zh, + /*tts_qwen3=*/py_availability->qwen3_tts, /*stt_fasterwhisper=*/py_availability->faster_whisper, /*stt_ds=*/stt_ds, /*stt_vosk=*/stt_vosk, @@ -3348,6 +3380,10 @@ QVariantMap speech_service::features_availability() { "kokoro-gpu-devices", variant_list_from_list( settings::instance()->kokoro_gpu_devices())); + m_features_availability.insert( + "qwen3-gpu-devices", + variant_list_from_list( + settings::instance()->qwen3_gpu_devices())); m_features_availability.insert( "addon-flags",