Skip to content

Commit 847d734

Browse files
Initial Opensource release
0 parents  commit 847d734

22 files changed

Lines changed: 1098 additions & 0 deletions

.env.example

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
MATRIX_HS_URL=https://matrix.example.org
2+
MATRIX_USER_ID=@transcriptbot:example.org
3+
MATRIX_ACCESS_TOKEN=syt_...
4+
# Optional - required only to auto-prune stale bot devices for E2EE hygiene
5+
MATRIX_PASSWORD=
6+
7+
# Optional
8+
LOCALE=en
9+
ASR_MODEL_NAME=nvidia/parakeet-tdt-0.6b-v3
10+
MAX_AUDIO_BYTES=26214400
11+
STORE_PATH=/data/store

.github/workflows/lint.yml

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
name: Lint & Security
2+
3+
on:
4+
push:
5+
branches: ["main", "master"]
6+
pull_request:
7+
branches: ["main", "master"]
8+
9+
jobs:
10+
ruff:
11+
name: Ruff (lint + format)
12+
runs-on: ubuntu-latest
13+
steps:
14+
- uses: actions/checkout@v4
15+
16+
- name: Set up Python
17+
uses: actions/setup-python@v5
18+
with:
19+
python-version: "3.11"
20+
21+
- name: Install ruff
22+
run: pip install ruff
23+
24+
- name: Lint
25+
run: ruff check src/
26+
27+
- name: Format check
28+
run: ruff format --check src/
29+
30+
bandit:
31+
name: Bandit (security)
32+
runs-on: ubuntu-latest
33+
steps:
34+
- uses: actions/checkout@v4
35+
36+
- name: Set up Python
37+
uses: actions/setup-python@v5
38+
with:
39+
python-version: "3.11"
40+
41+
- name: Install bandit
42+
run: pip install "bandit[toml]"
43+
44+
- name: Run bandit
45+
run: bandit -r src/ -c pyproject.toml --format json -o bandit-report.json --exit-zero
46+
47+
- name: Print bandit report
48+
if: always()
49+
run: |
50+
if [ -f bandit-report.json ]; then
51+
python - <<'EOF'
52+
import json, sys
53+
with open("bandit-report.json") as f:
54+
r = json.load(f)
55+
issues = r.get("results", [])
56+
if not issues:
57+
print("No security issues found.")
58+
sys.exit(0)
59+
for i in issues:
60+
sev = i["issue_severity"]
61+
conf = i["issue_confidence"]
62+
text = i["issue_text"]
63+
loc = f"{i['filename']}:{i['line_number']}"
64+
print(f"[{sev}/{conf}] {loc}: {text}")
65+
high = [i for i in issues if i["issue_severity"] == "HIGH"]
66+
if high:
67+
print(f"\n{len(high)} HIGH severity issue(s) found - failing.")
68+
sys.exit(1)
69+
EOF
70+
fi
71+
72+
- name: Upload bandit report
73+
if: always()
74+
uses: actions/upload-artifact@v4
75+
with:
76+
name: bandit-report
77+
path: bandit-report.json
78+
if-no-files-found: ignore

.gitignore

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
.env
2+
.env.local
3+
data/
4+
*.db
5+
__pycache__/
6+
*.py[cod]
7+
*$py.class
8+
.Python
9+
.venv/
10+
venv/
11+
.idea/
12+
.vscode/
13+
*.log
14+
.DS_Store

Dockerfile

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
FROM python:3.11-slim
2+
3+
# ffmpeg is required by pydub for audio decoding/conversion
4+
RUN apt-get update && apt-get install -y --no-install-recommends \
5+
ffmpeg \
6+
libolm-dev \
7+
libolm3 \
8+
&& rm -rf /var/lib/apt/lists/*
9+
10+
WORKDIR /app
11+
12+
# Install CPU-only PyTorch first as a separate layer, saves ~2 GB vs CUDA wheels
13+
RUN --mount=type=cache,target=/root/.cache/pip \
14+
pip install --timeout 300 --retries 5 \
15+
torch \
16+
torchaudio \
17+
--extra-index-url https://download.pytorch.org/whl/cpu
18+
19+
COPY requirements.txt .
20+
RUN --mount=type=cache,target=/root/.cache/pip \
21+
pip install -r requirements.txt
22+
23+
COPY src ./src
24+
25+
# Mount a volume here to cache the 2.4 GB Parakeet checkpoint across container restarts
26+
ENV NEMO_CACHE_DIR=/models
27+
ENV PYTHONUNBUFFERED=1
28+
29+
# Disable NVIDIA/NeMo telemetry
30+
ENV NEMO_ONE_LOGGER_ENABLED=false
31+
ENV ONE_LOGGER_ENABLED=false
32+
ENV NVIDIA_TF32_OVERRIDE=0
33+
ENV HF_HUB_DISABLE_TELEMETRY=1
34+
ENV DO_NOT_TRACK=1
35+
36+
CMD ["python", "-m", "src.main"]

LICENSE

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
MIT License
2+
3+
Copyright (c) 2026 SASHARD
4+
5+
Permission is hereby granted, free of charge, to any person obtaining a copy
6+
of this software and associated documentation files (the "Software"), to deal
7+
in the Software without restriction, including without limitation the rights
8+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9+
copies of the Software, and to permit persons to whom the Software is
10+
furnished to do so, subject to the following conditions:
11+
12+
The above copyright notice and this permission notice shall be included in all
13+
copies or substantial portions of the Software.
14+
15+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21+
SOFTWARE.

README.md

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
# Matrix Voice Transcript
2+
3+
Matrix bot that transcribes voice messages and audio files using [NVIDIA NeMo Parakeet TDT](https://huggingface.co/nvidia/parakeet-tdt-0.6b-v2) running locally on CPU. Supports E2EE rooms. No audio leaves the server.
4+
5+
## Requirements
6+
7+
- Matrix bot account with an access token.
8+
- `MATRIX_PASSWORD` recommended for E2EE rooms (enables stale-device pruning; without it decryption may fail on first run).
9+
- ~2.5 GB disk space for the model checkpoint (cached in `./models`, downloaded on first start).
10+
11+
## Quick start
12+
13+
1. Copy `.env.example` to `.env` and fill in the variables.
14+
2. `docker compose up -d`
15+
3. Invite the bot to a Matrix room.
16+
17+
## Environment variables
18+
19+
| Variable | Description |
20+
|---|---|
21+
| `MATRIX_HS_URL` | Homeserver URL (with `https://`) |
22+
| `MATRIX_USER_ID` | Full bot MXID, e.g. `@voicebot:example.org` |
23+
| `MATRIX_ACCESS_TOKEN` | Bot access token |
24+
| `MATRIX_PASSWORD` | Optional. Prunes stale E2EE devices on startup; required for reliable decryption in encrypted rooms. |
25+
| `LOCALE` | Message language: `en` (default) or `ru` |
26+
| `ASR_MODEL_NAME` | NeMo model (default: `nvidia/parakeet-tdt-0.6b-v2`) |
27+
| `MAX_AUDIO_BYTES` | Max file size in bytes (default: `26214400` = 25 MB) |
28+
| `STORE_PATH` | Olm key store path inside the container (default: `/data/store`) |
29+
30+
**Supported formats:** ogg/opus, webm, mp4/m4a, aac, flac, mp3, wav.
31+
32+
## Message language / Смена языка
33+
34+
```env
35+
LOCALE=en # English (default)
36+
LOCALE=ru # Russian / Русский
37+
```
38+
39+
`docker compose restart` to apply.
40+
41+
## Local development
42+
43+
Requires Python 3.11+ and `ffmpeg` on PATH.
44+
45+
```bash
46+
python -m venv .venv && .venv\Scripts\activate # Windows
47+
pip install -r requirements.txt
48+
python -m src.main
49+
```
50+
51+
## Security
52+
53+
- Never commit `.env`.
54+
- Transcribed text is never written to logs.
55+
- Temp audio files are deleted immediately after transcription.
56+
57+
## License
58+
59+
MIT

docker-compose.yml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
services:
2+
bot:
3+
build: .
4+
restart: unless-stopped
5+
env_file: .env
6+
volumes:
7+
- ./models:/models
8+
- ./store:/data/store

pyproject.toml

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
[tool.ruff]
2+
target-version = "py311"
3+
line-length = 100
4+
5+
[tool.ruff.lint]
6+
select = [
7+
"E", "W", "F", "I", "UP", "B", "C4", "SIM", "S", "T20", "RUF",
8+
]
9+
ignore = [
10+
"S101",
11+
"S104",
12+
"S311",
13+
]
14+
15+
[tool.ruff.lint.per-file-ignores]
16+
"src/__init__.py" = ["F401"]
17+
"src/strings.py" = ["RUF001"]
18+
19+
[tool.ruff.lint.isort]
20+
known-first-party = ["src"]
21+
22+
[tool.ruff.format]
23+
quote-style = "double"
24+
indent-style = "space"
25+
26+
[tool.bandit]
27+
targets = ["src"]
28+
severity = "medium"
29+
confidence = "medium"
30+
skips = ["B101", "B311", "B404", "B603"]

requirements-dev.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
ruff>=0.4.0
2+
bandit[toml]>=1.7.0

requirements.txt

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
matrix-nio[e2e]>=0.24.0,<0.26
2+
pydantic-settings>=2.2.0
3+
aiohttp>=3.9.0
4+
nemo_toolkit[asr]>=2.0.0
5+
torch>=2.0.0
6+
torchaudio>=2.0.0
7+
pydub>=0.25.1
8+
soundfile>=0.12.1

0 commit comments

Comments
 (0)