Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -403,6 +403,8 @@ set(dsnote_lib_sources
${sources_dir}/f5_engine.hpp
${sources_dir}/kokoro_engine.cpp
${sources_dir}/kokoro_engine.hpp
${sources_dir}/qwen3tts_engine.cpp
${sources_dir}/qwen3tts_engine.hpp
${sources_dir}/wl_clipboard.cpp
${sources_dir}/wl_clipboard.hpp
)
Expand Down
49 changes: 49 additions & 0 deletions config/models.json
Original file line number Diff line number Diff line change
Expand Up @@ -36174,6 +36174,55 @@
"engine": "tts_kokoro",
"lang_id": "zh"
},
{
"name": "Français (Qwen3-TTS Chelsie Female)",
"id": "fr_qwen3_chelsie",
"engine": "tts_qwen3",
"lang_id": "fr",
"speaker": "Chelsie"
},
{
"name": "Français (Qwen3-TTS Serena Female)",
"id": "fr_qwen3_serena",
"engine": "tts_qwen3",
"lang_id": "fr",
"speaker": "Serena"
},
{
"name": "Français (Qwen3-TTS Aria Female)",
"id": "fr_qwen3_aria",
"engine": "tts_qwen3",
"lang_id": "fr",
"speaker": "Aria"
},
{
"name": "Français (Qwen3-TTS Ethan Male)",
"id": "fr_qwen3_ethan",
"engine": "tts_qwen3",
"lang_id": "fr",
"speaker": "Ethan"
},
{
"name": "Français (Qwen3-TTS Aidan Male)",
"id": "fr_qwen3_aidan",
"engine": "tts_qwen3",
"lang_id": "fr",
"speaker": "Aidan"
},
{
"name": "English (Qwen3-TTS Chelsie Female)",
"id": "en_qwen3_chelsie",
"engine": "tts_qwen3",
"lang_id": "en",
"speaker": "Chelsie"
},
{
"name": "English (Qwen3-TTS Ethan Male)",
"id": "en_qwen3_ethan",
"engine": "tts_qwen3",
"lang_id": "en",
"speaker": "Ethan"
},
{
"name": "English (Parler-TTS)",
"id": "en_parler",
Expand Down
11 changes: 11 additions & 0 deletions src/dsnote_app.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4729,6 +4729,15 @@ bool dsnote_app::feature_kokoro_gpu() const {
feature_available("kokoro-tts-hip", false);
}

bool dsnote_app::feature_qwen3_tts() const {
return feature_available("qwen3-tts", false);
}

bool dsnote_app::feature_qwen3_gpu() const {
return feature_available("qwen3-tts-cuda", false) ||
feature_available("qwen3-tts-hip", false);
}

bool dsnote_app::feature_punctuator() const {
return feature_available("punctuator", false);
}
Expand Down Expand Up @@ -4902,6 +4911,8 @@ QVariantList dsnote_app::features_availability() {
feature_available("kokoro-tts-ja", false),
/*tts_kokoro_zh=*/
feature_available("kokoro-tts-zh", false),
/*tts_qwen3=*/
feature_available("qwen3-tts", false),
/*stt_fasterwhisper=*/
feature_available("faster-whisper-stt", false),
/*stt_ds=*/feature_available("coqui-stt", false),
Expand Down
2 changes: 2 additions & 0 deletions src/dsnote_app.h
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,8 @@
X(f5_gpu) \
X(kokoro_tts) \
X(kokoro_gpu) \
X(qwen3_tts) \
X(qwen3_gpu) \
X(punctuator) \
X(diacritizer_he) \
X(translator) \
Expand Down
31 changes: 31 additions & 0 deletions src/models_manager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,7 @@ QDebug operator<<(QDebug d, models_manager::feature_flags flags) {
if (flags & models_manager::engine_tts_parler) d << "engine-tts-parler, ";
if (flags & models_manager::engine_tts_f5) d << "engine-tts-f5, ";
if (flags & models_manager::engine_tts_kokoro) d << "engine-tts-kokoro, ";
if (flags & models_manager::engine_tts_qwen3) d << "engine-tts-qwen3, ";
if (flags & models_manager::engine_other) d << "engine-other, ";
if (flags & models_manager::hw_openvino) d << "hw-openvino, ";
if (flags & models_manager::stt_intermediate_results)
Expand Down Expand Up @@ -212,6 +213,9 @@ QDebug operator<<(QDebug d, models_manager::model_engine_t engine) {
case models_manager::model_engine_t::tts_kokoro:
d << "tts-kokoro";
break;
case models_manager::model_engine_t::tts_qwen3:
d << "tts-qwen3";
break;
case models_manager::model_engine_t::mnt_bergamot:
d << "mnt-bergamot";
break;
Expand Down Expand Up @@ -261,6 +265,7 @@ QDebug operator<<(QDebug d,
if (models_availability.tts_kokoro) d << "tts_kokoro,";
if (models_availability.tts_kokoro_ja) d << "tts_kokoro_ja,";
if (models_availability.tts_kokoro_zh) d << "tts_kokoro_zh,";
if (models_availability.tts_qwen3) d << "tts_qwen3,";
if (models_availability.stt_fasterwhisper) d << "stt_fasterwhisper,";
if (models_availability.stt_ds) d << "stt_ds,";
if (models_availability.stt_vosk) d << "stt_vosk,";
Expand Down Expand Up @@ -1575,6 +1580,7 @@ bool models_manager::is_modelless_engine(model_engine_t engine) {
switch (engine) {
case model_engine_t::tts_espeak:
case model_engine_t::tts_sam:
case model_engine_t::tts_qwen3:
return true;
case model_engine_t::stt_ds:
case model_engine_t::stt_vosk:
Expand Down Expand Up @@ -1611,6 +1617,7 @@ bool models_manager::is_ignore_on_sfos(model_engine_t engine,
case model_engine_t::tts_parler:
case model_engine_t::tts_f5:
case model_engine_t::tts_kokoro:
case model_engine_t::tts_qwen3:
case model_engine_t::tts_coqui:
return true;
case model_engine_t::stt_april:
Expand Down Expand Up @@ -1719,6 +1726,7 @@ models_manager::model_role_t models_manager::role_of_engine(
case model_engine_t::tts_parler:
case model_engine_t::tts_f5:
case model_engine_t::tts_kokoro:
case model_engine_t::tts_qwen3:
return model_role_t::tts;
case model_engine_t::mnt_bergamot:
return model_role_t::mnt;
Expand Down Expand Up @@ -1752,6 +1760,7 @@ models_manager::model_engine_t models_manager::engine_from_name(
if (name == QStringLiteral("tts_parler")) return model_engine_t::tts_parler;
if (name == QStringLiteral("tts_f5")) return model_engine_t::tts_f5;
if (name == QStringLiteral("tts_kokoro")) return model_engine_t::tts_kokoro;
if (name == QStringLiteral("tts_qwen3")) return model_engine_t::tts_qwen3;
if (name == QStringLiteral("mnt_bergamot"))
return model_engine_t::mnt_bergamot;

Expand Down Expand Up @@ -1894,6 +1903,7 @@ models_manager::feature_flags models_manager::add_new_feature(
case feature_flags::engine_tts_parler:
case feature_flags::engine_tts_f5:
case feature_flags::engine_tts_kokoro:
case feature_flags::engine_tts_qwen3:
case feature_flags::engine_mnt:
case feature_flags::engine_other:
if (existing_features & feature_flags::engine_stt_ds ||
Expand Down Expand Up @@ -2087,6 +2097,16 @@ models_manager::feature_flags models_manager::add_implicit_feature_flags(
existing_features, score == 0 ? feature_flags::low_quality
: feature_flags::high_quality);
break;
case model_engine_t::tts_qwen3:
existing_features =
add_new_feature(existing_features,
feature_flags::engine_tts_qwen3) |
add_new_feature(existing_features,
feature_flags::medium_processing);
existing_features = add_new_feature(
existing_features, score == 0 ? feature_flags::low_quality
: feature_flags::high_quality);
break;
case model_engine_t::tts_rhvoice:
existing_features =
add_new_feature(existing_features,
Expand Down Expand Up @@ -2387,6 +2407,11 @@ auto models_manager::extract_models(
qDebug() << "ignoring kokoro model:" << model_id;
continue;
}
if (!models_availability->tts_qwen3 &&
engine == model_engine_t::tts_qwen3) {
qDebug() << "ignoring qwen3 model:" << model_id;
continue;
}
if (!models_availability->tts_rhvoice &&
engine == model_engine_t::tts_rhvoice) {
qDebug() << "ignoring rhvoice model:" << model_id;
Expand Down Expand Up @@ -2793,6 +2818,7 @@ QString models_manager::file_name_from_id(const QString& id,
case model_engine_t::tts_parler:
case model_engine_t::tts_f5:
case model_engine_t::tts_kokoro:
case model_engine_t::tts_qwen3:
case model_engine_t::mnt_bergamot:
return id;
}
Expand Down Expand Up @@ -2990,6 +3016,11 @@ void models_manager::update_models_using_availability_internal() {
return;
}
}
if (!m_models_availability->tts_qwen3 &&
pair.second.engine == model_engine_t::tts_qwen3) {
pair.second.disabled = true;
return;
}
if (pair.second.engine == model_engine_t::tts_mimic3) {
if (!m_models_availability->tts_mimic3 ||
(!m_models_availability->tts_mimic3_de &&
Expand Down
3 changes: 3 additions & 0 deletions src/models_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ class models_manager : public QObject, public singleton<models_manager> {
tts_parler,
tts_f5,
tts_kokoro,
tts_qwen3,
mnt_bergamot
};
friend QDebug operator<<(QDebug d, model_engine_t engine);
Expand Down Expand Up @@ -91,6 +92,7 @@ class models_manager : public QObject, public singleton<models_manager> {
engine_tts_parler = 1U << 18U,
engine_tts_f5 = 1U << 19U,
engine_tts_kokoro = 1U << 20U,
engine_tts_qwen3 = 1U << 21U,
engine_mnt = 1U << 23U,
engine_other = 1U << 24U,
generic_end = engine_other,
Expand Down Expand Up @@ -189,6 +191,7 @@ class models_manager : public QObject, public singleton<models_manager> {
bool tts_kokoro = false;
bool tts_kokoro_ja = false;
bool tts_kokoro_zh = false;
bool tts_qwen3 = false;
bool stt_fasterwhisper = false;
bool stt_ds = false;
bool stt_vosk = false;
Expand Down
12 changes: 11 additions & 1 deletion src/py_tools.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,8 @@ std::ostream& operator<<(std::ostream& os,
<< ", whisperspeech-tts=" << availability.whisperspeech_tts
<< ", parler-tts=" << availability.parler_tts
<< ", f5-tts=" << availability.f5_tts
<< ", kokoro-tts=" << availability.f5_tts
<< ", kokoro-tts=" << availability.kokoro_tts
<< ", qwen3-tts=" << availability.qwen3_tts
<< ", transformers=" << availability.transformers
<< ", unikud=" << availability.unikud
<< ", gruut_de=" << availability.gruut_de
Expand Down Expand Up @@ -81,6 +82,7 @@ libs_availability_t libs_availability(libs_scan_type_t scan_type,
availability.kokoro_tts = true;
availability.kokoro_ja = true;
availability.kokoro_zh = true;
availability.qwen3_tts = true;
}
availability.faster_whisper = true;
availability.mimic3_tts = true;
Expand Down Expand Up @@ -226,6 +228,14 @@ libs_availability_t libs_availability(libs_scan_type_t scan_type,
LOGD("kokoro tts check py error: " << err.what());
}

try {
LOGD("checking: qwen3 tts");
py::module_::import("qwen_tts");
availability.qwen3_tts = true;
} catch (const std::exception& err) {
LOGD("qwen3 tts check py error: " << err.what());
}

try {
LOGD("checking: faster-whisper");
py::module_::import("faster_whisper");
Expand Down
1 change: 1 addition & 0 deletions src/py_tools.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ struct libs_availability_t {
bool parler_tts = false;
bool f5_tts = false;
bool kokoro_tts = false;
bool qwen3_tts = false;
bool transformers = false;
bool unikud = false;
bool gruut_de = false;
Expand Down
Loading