Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 24 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@

Kitten TTS is an open-source realistic text-to-speech model with just 15 million parameters, designed for lightweight deployment and high-quality voice synthesis.

> **UI and enhancement made by Muhammad Umer**

*Currently in developer preview*

[Join our discord](https://discord.com/invite/VJ86W4SURW)
Expand Down Expand Up @@ -59,7 +61,28 @@ pip install https://github.com/KittenML/KittenTTS/releases/download/0.8/kittentt



### Basic Usage
```

### Graphical Interface (Windows)

We have provided a native, modern UI designed specifically for Windows!
To use the UI:
1. Double-click **`Run_KittenTTS.bat`** from your File Explorer.
2. It will automatically build an environment if needed, and open the KittenTTS Desktop app.
3. Type your text, select your voice and model, and click **Generate & Play ▶**!

### Terminal CLI Usage

You can also use KittenTTS directly from your terminal/command line!
```bash
# Generate immediate audio
kittentts "Hello! I am speaking to you directly from the terminal" --voice Jasper --output out.wav

# View all dynamically loaded voices
kittentts --list-voices
```

### Basic Python Usage

```
from kittentts import KittenTTS
Expand Down
38 changes: 38 additions & 0 deletions Run_KittenTTS.bat
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
@echo off
echo ==============================================
echo KittenTTS Graphical Interface
echo ==============================================
echo.

:: Check if virtual environment exists
if not exist "venv\Scripts\python.exe" (
echo [INFO] Virtual environment not found. Setting it up now...
python -m venv venv
if errorlevel 1 (
echo [ERROR] Failed to create virtual environment. Ensure Python is installed and in PATH.
pause
exit /b 1
)
)

:: Activate the environment and install/update dependencies quietly
echo [INFO] Updating dependencies...
call "venv\Scripts\activate.bat"
if errorlevel 1 (
echo [ERROR] Failed to activate virtual environment.
pause
exit /b 1
)

venv\Scripts\python.exe -m pip install -e . -q
if errorlevel 1 (
echo [ERROR] Initialization failed while installing dependencies.
pause
exit /b 1
)

:: Run the UI application
echo [INFO] Launching UI...
venv\Scripts\python.exe -m kittentts.ui

pause
10 changes: 0 additions & 10 deletions example.py

This file was deleted.

45 changes: 45 additions & 0 deletions kittentts/cli.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
import argparse
import sys
from kittentts import KittenTTS

def main():
parser = argparse.ArgumentParser(description="KittenTTS: High-quality, ultra-lightweight Text-to-Speech")
parser.add_argument("text", nargs="?", help="The text to synthesize into speech")
parser.add_argument("--voice", default="Jasper", help="Voice to use (e.g., Bella, Jasper, Luna). Default: Jasper")
parser.add_argument("--model", default="KittenML/kitten-tts-nano-0.1", help="Model ID to use. Default: KittenML/kitten-tts-nano-0.1")
parser.add_argument("--output", default="output.wav", help="Output audio file path. Default: output.wav")
parser.add_argument("--speed", type=float, default=1.0, help="Speech speed (1.0 = normal). Default: 1.0")
parser.add_argument("--list-voices", action="store_true", help="List available voices for the model and exit")

args = parser.parse_args()

if not args.list_voices and not args.text:
parser.error("The 'text' argument is required unless --list-voices is specified.")

# Initialize model
print(f"Loading model: {args.model}...")
model = KittenTTS(model_name=args.model)

if model.model is None:
sys.exit(1)

if args.list_voices:
print("\n=== Available Voices ===")
print("Aliases:")
for alias in model.model.voice_aliases.keys():
print(f" - {alias}")
print("\nNative Voices:")
for voice in model.available_voices:
if voice not in model.model.voice_aliases.values():
print(f" - {voice}")
sys.exit(0)


print(f"Generating audio with voice '{args.voice}'...")
try:
model.generate_to_file(args.text, args.output, voice=args.voice, speed=args.speed)
except ValueError as e:
sys.exit(1)

if __name__ == "__main__":
main()
67 changes: 38 additions & 29 deletions kittentts/get_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,37 +64,46 @@ def download_from_huggingface(repo_id="KittenML/kitten-tts-nano-0.1", cache_dir=
Returns:
KittenTTS_1_Onnx: Instantiated model ready for use
"""
# Download config file first
config_path = hf_hub_download(
repo_id=repo_id,
filename="config.json",
cache_dir=cache_dir
)

# Load config
with open(config_path, 'r') as f:
config = json.load(f)
try:
# Download config file first
config_path = hf_hub_download(
repo_id=repo_id,
filename="config.json",
cache_dir=cache_dir
)

# Load config
with open(config_path, 'r') as f:
config = json.load(f)

if config.get("type") not in ["ONNX1", "ONNX2"]:
raise ValueError("Unsupported model type.")
if config.get("type") not in ["ONNX1", "ONNX2"]:
print(f"Error: Unsupported model type '{config.get('type')}'.")
return None

# Download model and voices files based on config
model_path = hf_hub_download(
repo_id=repo_id,
filename=config["model_file"],
cache_dir=cache_dir
)

voices_path = hf_hub_download(
repo_id=repo_id,
filename=config["voices"],
cache_dir=cache_dir
)

# Instantiate and return model
model = KittenTTS_1_Onnx(model_path=model_path, voices_path=voices_path, speed_priors=config.get("speed_priors", {}) , voice_aliases=config.get("voice_aliases", {}))

return model
# Download model and voices files based on config
model_path = hf_hub_download(
repo_id=repo_id,
filename=config["model_file"],
cache_dir=cache_dir
)

voices_path = hf_hub_download(
repo_id=repo_id,
filename=config["voices"],
cache_dir=cache_dir
)

# Instantiate and return model
model = KittenTTS_1_Onnx(model_path=model_path, voices_path=voices_path, speed_priors=config.get("speed_priors", {}) , voice_aliases=config.get("voice_aliases", {}))

return model

except Exception as e:
print(f"\n❌ Failed to load model '{repo_id}'.")
print(f"Error details: {str(e)}")
print("Please check your internet connection or verify the model name is correct.")
print("Example valid models: 'KittenML/kitten-tts-nano-0.1', 'KittenML/kitten-tts-mini-0.8'\n")
return None


def get_model(repo_id="KittenML/kitten-tts-nano-0.1", cache_dir=None):
Expand Down
37 changes: 28 additions & 9 deletions kittentts/onnx_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,23 +95,42 @@ def __init__(self, model_path="kitten_tts_nano_preview.onnx", voices_path="voice
self.text_cleaner = TextCleaner()
self.speed_priors = speed_priors

# Available voices
self.available_voices = [
'expr-voice-2-m', 'expr-voice-2-f', 'expr-voice-3-m', 'expr-voice-3-f',
'expr-voice-4-m', 'expr-voice-4-f', 'expr-voice-5-m', 'expr-voice-5-f'
]
self.voice_aliases = voice_aliases
# Available voices dynamically loaded from the .npz file
self.available_voices = list(self.voices.keys())

# Default fallback aliases, can be extended by config
default_aliases = {
'Bella': 'expr-voice-2-f',
'Jasper': 'expr-voice-2-m',
'Luna': 'expr-voice-3-f',
'Bruno': 'expr-voice-3-m',
'Rosie': 'expr-voice-4-f',
'Hugo': 'expr-voice-4-m',
'Kiki': 'expr-voice-5-f',
'Leo': 'expr-voice-5-m'
}
self.voice_aliases = default_aliases
self.voice_aliases.update(voice_aliases)


self.preprocessor = TextPreprocessor()

def _prepare_inputs(self, text: str, voice: str, speed: float = 1.0) -> dict:
"""Prepare ONNX model inputs from text and voice parameters."""
# Try to resolve alias if necessary
if voice in self.voice_aliases:
voice = self.voice_aliases[voice]

if voice not in self.available_voices:
raise ValueError(f"Voice '{voice}' not available. Choose from: {self.available_voices}")

# Check if the requested voice exists in the dynamic voice dictionary
if voice not in self.voices:
fallback = list(self.aliases.keys())[0] if hasattr(self, 'aliases') and len(self.aliases) > 0 else (self.available_voices[0] if self.available_voices else None)
error_msg = f"\n❌ Voice '{voice}' not found."
error_msg += f"\n👉 Available native voices: {self.available_voices}"
error_msg += f"\n👉 Available voice aliases: {list(self.voice_aliases.keys())}"
if fallback:
error_msg += f"\nPlease try using a valid voice like '{fallback}'."
print(error_msg)
raise ValueError(f"Voice '{voice}' not available.")
if voice in self.speed_priors:
speed = speed * self.speed_priors[voice]

Expand Down
Loading