Misaki is a multilingual grapheme-to-phoneme (G2P) engine running on Pyodide/WebAssembly. This project provides pre-built wheels for spaCy, Misaki, and various language-specific dependencies compiled for Pyodide.
- Multi-language support: English, Chinese, Japanese, Vietnamese, Korean
- Browser-based: Runs entirely in the browser via Pyodide
- Interactive demo: Web interface for testing all supported languages
- spaCy integration: Full spaCy NLP pipeline support
- Start a local HTTP server:
python -m http.server 8000- Open
index.htmlin your browser to access the interactive G2P tester.
The demo is automatically configured to work with GitHub Pages:
-
Enable GitHub Pages in your repository:
- Go to Settings → Pages
- Set Source to "GitHub Actions"
-
Push your code to the
mainormasterbranch:git add . git commit -m "Deploy to GitHub Pages" git push origin main
-
Access your demo at:
https://[username].github.io/[repository-name]/
The GitHub Actions workflow (.github/workflows/deploy.yml) will automatically deploy your site.
All required wheel files should be served from the wheelhouse/pyodide-cp312/ directory. Ensure you have built the necessary packages using the build scripts provided.
import micropip
# Install spaCy and dependencies
await micropip.install("http://localhost:8000/murmurhash-1.0.12-cp312-cp312-pyodide_2024_0_wasm32.whl")
await micropip.install("http://localhost:8000/blis-1.2.0-cp312-cp312-pyodide_2024_0_wasm32.whl")
await micropip.install("http://localhost:8000/srsly-2.5.1-cp312-cp312-pyodide_2024_0_wasm32.whl")
await micropip.install("http://localhost:8000/cymem-2.0.11-cp312-cp312-pyodide_2024_0_wasm32.whl")
await micropip.install("http://localhost:8000/preshed-3.0.9-cp312-cp312-pyodide_2024_0_wasm32.whl")
await micropip.install("http://localhost:8000/thinc-8.3.4-cp312-cp312-pyodide_2024_0_wasm32.whl")
await micropip.install("http://localhost:8000/spacy-3.8.6-cp312-cp312-pyodide_2024_0_wasm32.whl")
await micropip.install("http://localhost:8000/en_core_web_sm-3.8.0-py3-none-any.whl")
# Use spaCy
import spacy
nlp = spacy.load("en_core_web_sm")
doc = nlp("The quick brown fox jumps over the lazy dog.")
for token in doc:
print(token.text, token.pos_, token.tag_)Output:
The DET DT
quick ADJ JJ
brown ADJ JJ
fox NOUN NN
jumps VERB VBZ
over ADP IN
the DET DT
lazy ADJ JJ
dog NOUN NN
. PUNCT .
await micropip.install('http://localhost:8000/docopt-0.6.2-py2.py3-none-any.whl')
await micropip.install('num2words')
await micropip.install('misaki')
from misaki import en
g2p = en.G2P(trf=False, british=False, fallback=None)
text = '[Misaki](/misˈɑki/) is a G2P engine designed for [Kokoro](/kˈOkəɹO/) models.'
phonemes, tokens = g2p(text)
print(phonemes)Output:
misˈɑki ɪz ɐ ʤˈitəpˈi ˈɛnʤən dəzˈInd fɔɹ kˈOkəɹO mˈɑdᵊlz.
await micropip.install('ordered_set')
await micropip.install('pypinyin')
await micropip.install('cn2an')
await micropip.install('http://localhost:8000/jieba-0.42.1-py3-none-any.whl')
await micropip.install('misaki')
from misaki import zh
g2p = zh.ZHG2P()
text = "你好世界"
result = g2p(text)
print(result)Output:
('ni↓xau↓ ʂɨ↘ʨje↘', None)
await micropip.install('http://localhost:8000/pyopenjtalk-0.4.1-cp312-cp312-pyodide_2024_0_wasm32.whl')
await micropip.install('http://localhost:8000/fugashi-1.5.3.dev0+g1576b66a2d20260328-cp312-cp312-pyodide_2024_0_wasm32.whl')
await micropip.install('http://localhost:8000/unidic_lite-1.0.8-py3-none-any.whl')
await micropip.install('http://localhost:8000/mojimoji-0.0.13-cp312-cp312-pyodide_2024_0_wasm32.whl')
await micropip.install('jaconv')
await micropip.install('misaki')
from misaki import ja
jp = ja.JAG2P()
text = "これはテストです。"
result = jp(text)
print(result)Output:
('koɾe βa tesɨto desɨ.', None)
await micropip.install('http://localhost:8000/docopt-0.6.2-py2.py3-none-any.whl')
await micropip.install('num2words')
await micropip.install('http://localhost:8000/underthesea_core-3.3.0-cp312-cp312-pyodide_2024_0_wasm32.whl')
await micropip.install('https://files.pythonhosted.org/packages/40/31/6b65514ff282b27638847e1618b3826d92d6d2551ee0dffaad3b8f046075/vietnam_number-1.0.6-py3-none-any.whl')
await micropip.install('https://files.pythonhosted.org/packages/86/3e/3a523bdd24510288b1b850428e01172116a29268378b1da9a8d0b894a115/click-8.1.0-py3-none-any.whl')
await micropip.install('https://files.pythonhosted.org/packages/bf/0e/c68adf10adda05f28a6ed7b9f4cd7b8e07f641b44af88ba72d9c89e4de7a/typer-0.9.0-py3-none-any.whl')
await micropip.install('underthesea')
await micropip.install('viphoneme')
await micropip.install('misaki')
from misaki import vi
vt = vi.VIG2P()
text = "Tôi đang học xử lý ngôn ngữ tự nhiên"
phonemes, tokens = vt(text)
print(phonemes)Output:
toj1 daŋ1 hɔk͡p6 sɯ4 li5 ŋon1 ŋɯ3 tɯ6 ɲiən1
await micropip.install('http://localhost:8000/docopt-0.6.2-py2.py3-none-any.whl')
await micropip.install('num2words')
await micropip.install('http://localhost:8000/python_mecab_ko-1.3.7-cp312-cp312-pyodide_2024_0_wasm32.whl')
await micropip.install('jamo')
await micropip.install('nltk')
await micropip.install('misaki')
from misaki import ko
import pyodide_http
pyodide_http.patch_all()
import nltk
nltk.download('cmudict')
text = "안녕하세요, 저는 학생입니다."
kt = ko.KOG2P()
phonemes = kt(text)
print(phonemes)./build_mecab_wasm.shMECAB_WASM_PREFIX="$PWD/build-wasm/install" PACKAGES_TO_BUILD="python_mecab_ko" ./build_pyodide_cp312_from_tarballs.shSee individual component licenses in their respective directories.