diff --git a/README.md b/README.md index 88fa577..9e2569f 100644 --- a/README.md +++ b/README.md @@ -73,7 +73,7 @@ Try Kitten TTS directly in your browser on [Hugging Face Spaces](https://hugging pip install https://github.com/KittenML/KittenTTS/releases/download/0.8.1/kittentts-0.8.1-py3-none-any.whl ``` -### Basic Usage +### API Usage ```python from kittentts import KittenTTS @@ -85,6 +85,12 @@ import soundfile as sf sf.write("output.wav", audio, 24000) ``` +### CLI usage + +``` +kittentts --output output.wav --text "This high quality TTS model works without a GPU" +``` + ### Advanced Usage ```python diff --git a/kittentts/__main__.py b/kittentts/__main__.py new file mode 100644 index 0000000..73c7da6 --- /dev/null +++ b/kittentts/__main__.py @@ -0,0 +1,61 @@ +import argparse +import datetime +import io +import sys + +voices = [ + "expr-voice-2-m", + "expr-voice-2-f", + "expr-voice-3-m", + "expr-voice-3-f", + "expr-voice-4-m", + "expr-voice-4-f", + "expr-voice-5-m", + "expr-voice-5-f", +] + + +def run(*, model: str, voice: str, output: str, text: str, speed: float=1.0) -> datetime.timedelta: + from kittentts import KittenTTS + import soundfile as sf + + m = KittenTTS(model) + t0 = datetime.datetime.now() + audio = m.generate(text, voice=voice, speed=speed) + if output == "-": + # sf requires a seekable buffer for writing. + bio = io.BytesIO() + sf.write(bio, audio, 24000, format="WAV", subtype="PCM_16") + sys.stdout.buffer.write(bio.getvalue()) + else: + sf.write(output, audio, 24000) + t1 = datetime.datetime.now() + return t1 - t0 + + +def main() -> None: + ap = argparse.ArgumentParser(prog="kittentts", description="Run Kitten TTS model") + ap.add_argument("--model", default="KittenML/kitten-tts-nano-0.1", help="Model to use") + ap.add_argument("--text", required=True, help="Text to synthesize") + ap.add_argument("--voice", default="expr-voice-2-f", help="Voice to use", choices=voices) + ap.add_argument("--speed", type=float, default=1.0, help="Speech speed (1.0 = normal)") + ap.add_argument("--output", help="Output audio file (- for stdout; use with care)") + + args = ap.parse_args() + + if not args.output: + ts = datetime.datetime.now().isoformat(timespec="seconds").replace(":", "-") + args.output = f"{args.voice}-{ts}.wav" + + gen_time = run( + model=args.model, + voice=args.voice, + output=args.output, + text=args.text, + speed=args.speed, + ) + print(f"Generated audio in {gen_time}, saved to {args.output}", file=sys.stderr) + + +if __name__ == "__main__": + main() diff --git a/pyproject.toml b/pyproject.toml index 8a60547..6bcf4fc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -28,6 +28,9 @@ dependencies = [ "huggingface_hub", ] +[project.scripts] +kittentts = "kittentts.__main__:main" + [project.urls] Homepage = "https://github.com/kittenml/kittentts" Repository = "https://github.com/kittenml/kittentts" diff --git a/setup.py b/setup.py index 5ff10c6..e870b24 100644 --- a/setup.py +++ b/setup.py @@ -43,4 +43,9 @@ "Bug Reports": "https://github.com/kittenml/kittentts/issues", "Source": "https://github.com/kittenml/kittentts", }, + entry_points={ + "console_scripts": [ + "kittentts=kittentts.__main__:main", + ], + }, )