Skip to content

Commit 12cda9d

Browse files
Enhance bot functionality and configuration for E2EE support
- Added MATRIX_USER_ID and MATRIX_PASSWORD to .env.example for E2EE hygiene. - Updated ASR_MODEL_NAME in .env.example to version 0.6b-v3. - Modified Dockerfile to install libolm dependencies and improve pip caching. - Updated docker-compose.yml to mount store directory for persistent data. - Adjusted requirements.txt to include matrix-nio with E2E support. - Enhanced audio_converter.py to handle encrypted attachments and improve audio downloading. - Updated bot_service.py to process encrypted audio events and manage device pruning. - Added matrix_password to config.py for E2EE support. - Implemented device management in main.py for better E2EE hygiene. - Enhanced matrix_handlers.py to trust devices and claim missing Olm sessions. - Applied schema patches in nio_patch.py for compatibility with key upload responses. - Added local settings for permissions in settings.local.json. - Expanded .gitignore to include additional files and directories. - Created new files for blacklisted, ignored, and trusted devices in the store.
1 parent 04040b6 commit 12cda9d

14 files changed

Lines changed: 346 additions & 31 deletions

.env.example

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,11 @@
11
MATRIX_HS_URL=https://matrix.example.org
22
MATRIX_USER_ID=@transcriptbot:example.org
33
MATRIX_ACCESS_TOKEN=syt_...
4+
# Optional — required only to auto-prune stale bot devices for E2EE hygiene
5+
MATRIX_PASSWORD=
46

57
# Optional
68
LOCALE=en
7-
ASR_MODEL_NAME=nvidia/parakeet-tdt-0.6b-v2
9+
ASR_MODEL_NAME=nvidia/parakeet-tdt-0.6b-v3
810
MAX_AUDIO_BYTES=26214400
11+
STORE_PATH=/data/store

.gitignore

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
.env
2+
.env.local
3+
data/
4+
*.db
5+
__pycache__/
6+
*.py[cod]
7+
*$py.class
8+
.Python
9+
.venv/
10+
venv/
11+
.idea/
12+
.vscode/
13+
*.log
14+
.DS_Store

Dockerfile

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,23 +3,34 @@ FROM python:3.11-slim
33
# ffmpeg is required by pydub for audio decoding/conversion
44
RUN apt-get update && apt-get install -y --no-install-recommends \
55
ffmpeg \
6+
libolm-dev \
7+
libolm3 \
68
&& rm -rf /var/lib/apt/lists/*
79

810
WORKDIR /app
911

1012
# Install CPU-only PyTorch first as a separate layer — saves ~2 GB vs CUDA wheels
11-
RUN pip install --no-cache-dir \
13+
RUN --mount=type=cache,target=/root/.cache/pip \
14+
pip install --timeout 300 --retries 5 \
1215
torch \
1316
torchaudio \
1417
--extra-index-url https://download.pytorch.org/whl/cpu
1518

1619
COPY requirements.txt .
17-
RUN pip install --no-cache-dir -r requirements.txt
20+
RUN --mount=type=cache,target=/root/.cache/pip \
21+
pip install -r requirements.txt
1822

1923
COPY src ./src
2024

2125
# Mount a volume here to cache the 2.4 GB Parakeet checkpoint across container restarts
2226
ENV NEMO_CACHE_DIR=/models
2327
ENV PYTHONUNBUFFERED=1
2428

29+
# Disable NVIDIA/NeMo telemetry
30+
ENV NEMO_ONE_LOGGER_ENABLED=false
31+
ENV ONE_LOGGER_ENABLED=false
32+
ENV NVIDIA_TF32_OVERRIDE=0
33+
ENV HF_HUB_DISABLE_TELEMETRY=1
34+
ENV DO_NOT_TRACK=1
35+
2536
CMD ["python", "-m", "src.main"]

docker-compose.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,3 +5,4 @@ services:
55
env_file: .env
66
volumes:
77
- ./models:/models
8+
- ./store:/data/store

requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
matrix-nio>=0.24.0,<0.26
1+
matrix-nio[e2e]>=0.24.0,<0.26
22
pydantic-settings>=2.2.0
33
aiohttp>=3.9.0
44
nemo_toolkit[asr]>=2.0.0

src/audio_converter.py

Lines changed: 92 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,17 @@
11
from __future__ import annotations
22

3+
import contextlib
34
import io
45
import logging
6+
import os
57
import tempfile
68
from urllib.parse import quote
79

810
import aiohttp
911
from nio import AsyncClient
12+
from nio.crypto.attachments import decrypt_attachment
13+
14+
_ = io # kept for backwards compat; pydub no longer used
1015

1116
logger = logging.getLogger(__name__)
1217

@@ -33,6 +38,13 @@ def _pydub_format(mime: str) -> str:
3338
return _MIME_TO_PYDUB.get(mime.lower().split(";")[0].strip(), "ogg")
3439

3540

41+
def _input_suffix(mime: str) -> str:
42+
fmt = _pydub_format(mime)
43+
return {"mp4": ".m4a", "webm": ".webm", "ogg": ".ogg",
44+
"mp3": ".mp3", "flac": ".flac", "wav": ".wav",
45+
"aac": ".aac"}.get(fmt, ".bin")
46+
47+
3648
def _parse_mxc(mxc_url: str) -> tuple[str, str] | None:
3749
if not mxc_url.startswith("mxc://"):
3850
return None
@@ -49,8 +61,15 @@ def __init__(self, matrix: AsyncClient, max_bytes: int) -> None:
4961
self._matrix = matrix
5062
self._max_bytes = max_bytes
5163

52-
async def download_mxc(self, mxc_url: str) -> bytes | None:
53-
"""Download mxc:// URL via authenticated v1 endpoint with legacy fallback."""
64+
async def download_mxc(
65+
self,
66+
mxc_url: str,
67+
encrypted_file: dict | None = None,
68+
) -> bytes | None:
69+
"""Download mxc:// URL and optionally decrypt an E2EE attachment.
70+
71+
Pass the content["file"] dict from the Matrix event for encrypted rooms.
72+
"""
5473
parsed = _parse_mxc(mxc_url)
5574
if parsed is None:
5675
logger.warning("Invalid mxc URL: %s", mxc_url)
@@ -71,17 +90,54 @@ async def download_mxc(self, mxc_url: str) -> bytes | None:
7190
async with aiohttp.ClientSession() as session:
7291
for url in urls:
7392
try:
74-
async with session.get(url, headers=headers) as resp:
93+
async with session.get(url, headers=headers, allow_redirects=True) as resp:
7594
if resp.status != 200:
95+
logger.info("Download endpoint %s returned %d", url, resp.status)
7696
continue
7797
content_length = resp.content_length
7898
if content_length and content_length > self._max_bytes:
7999
logger.warning("Audio too large: %d bytes", content_length)
80100
return None
81-
data = await resp.content.read(self._max_bytes + 1)
101+
# Read full body, comparing against declared Content-Length to
102+
# catch truncated responses from Synapse's media replication.
103+
data = await resp.read()
82104
if len(data) > self._max_bytes:
83105
logger.warning("Audio exceeds max bytes after download")
84106
return None
107+
if not data:
108+
logger.warning("Empty body from %s", url)
109+
continue
110+
if content_length and len(data) < content_length:
111+
logger.warning(
112+
"Truncated response from %s: got %d / %d bytes",
113+
url, len(data), content_length,
114+
)
115+
continue
116+
content_type = resp.headers.get("Content-Type", "?")
117+
logger.info(
118+
"Downloaded %d bytes from %s (Content-Type=%s, declared=%s)",
119+
len(data), url, content_type, content_length,
120+
)
121+
# Suspiciously small response that isn't audio — likely a JSON error
122+
if len(data) < 1024 and not content_type.startswith("audio/"):
123+
preview = data[:512].decode("utf-8", errors="replace")
124+
logger.warning("Non-audio short response body: %s", preview)
125+
continue
126+
# Validate OGG signature when MIME claims ogg — Synapse sometimes
127+
# serves stub responses on cache miss
128+
if content_type.startswith("audio/ogg") and not data.startswith(b"OggS"):
129+
preview = data[:64].hex()
130+
logger.warning(
131+
"Response claims ogg but missing OggS magic from %s: %s",
132+
url, preview,
133+
)
134+
continue
135+
if encrypted_file:
136+
try:
137+
data = decrypt_attachment(data, encrypted_file)
138+
except Exception:
139+
logger.exception("Failed to decrypt attachment mxc=%s", mxc_url)
140+
return None
85141
return data
86142
except aiohttp.ClientError:
87143
logger.exception("Download failed for %s", url)
@@ -97,15 +153,37 @@ def convert_to_wav(self, audio_bytes: bytes, mime: str) -> str | None:
97153
Caller must delete the file (e.g. via os.unlink in a finally block).
98154
Blocking — must be called via run_in_executor.
99155
"""
100-
from pydub import AudioSegment
156+
import subprocess
157+
158+
# Write to disk first — ffmpeg's pipe input is unreliable with some
159+
# ogg/opus payloads on ffmpeg 7.x, while file input is rock-solid.
160+
with tempfile.NamedTemporaryFile(suffix=_input_suffix(mime), delete=False) as src:
161+
src.write(audio_bytes)
162+
src_path = src.name
163+
164+
dst_path = tempfile.NamedTemporaryFile(suffix=".wav", delete=False).name
101165

102-
fmt = _pydub_format(mime)
103166
try:
104-
seg = AudioSegment.from_file(io.BytesIO(audio_bytes), format=fmt)
105-
seg = seg.set_frame_rate(16000).set_channels(1).set_sample_width(2)
106-
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
107-
seg.export(tmp.name, format="wav")
108-
return tmp.name
109-
except Exception:
110-
logger.exception("Audio conversion failed (mime=%s)", mime)
111-
return None
167+
result = subprocess.run(
168+
[
169+
"ffmpeg", "-y", "-hide_banner", "-loglevel", "error",
170+
"-i", src_path,
171+
"-ac", "1", "-ar", "16000", "-sample_fmt", "s16",
172+
dst_path,
173+
],
174+
capture_output=True,
175+
check=False,
176+
)
177+
if result.returncode != 0:
178+
logger.error(
179+
"ffmpeg failed (mime=%s, rc=%d): %s",
180+
mime, result.returncode,
181+
result.stderr.decode("utf-8", errors="replace").strip(),
182+
)
183+
with contextlib.suppress(OSError):
184+
os.unlink(dst_path)
185+
return None
186+
return dst_path
187+
finally:
188+
with contextlib.suppress(OSError):
189+
os.unlink(src_path)

src/bot_service.py

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -54,28 +54,37 @@ async def handle_audio_event(
5454
if event.sender == self.matrix.user_id:
5555
return
5656

57-
mxc_url = getattr(event, "url", None)
57+
content: dict = event.source.get("content", {})
58+
info: dict = content.get("info") or {}
59+
mime: str = info.get("mimetype", "audio/ogg")
60+
file_size: int = info.get("size", 0)
61+
62+
# Encrypted rooms use content["file"] with embedded key material;
63+
# unencrypted rooms use content["url"].
64+
encrypted_file: dict | None = content.get("file")
65+
if encrypted_file:
66+
mxc_url = encrypted_file.get("url")
67+
else:
68+
mxc_url = getattr(event, "url", None) or content.get("url")
69+
5870
if not mxc_url or not str(mxc_url).startswith("mxc://"):
5971
logger.warning("Audio event has no mxc url room=%s", room.room_id) # type: ignore[attr-defined]
6072
return
6173

62-
info: dict = event.source.get("content", {}).get("info") or {}
63-
mime: str = info.get("mimetype", "audio/ogg")
64-
file_size: int = info.get("size", 0)
65-
6674
if file_size and file_size > self.settings.max_audio_bytes:
6775
max_mb = self.settings.max_audio_bytes // (1024 * 1024)
6876
await self._send_plain(room.room_id, self.strings.audio_too_large.format(max_mb=max_mb)) # type: ignore[attr-defined]
6977
return
7078

7179
logger.info(
72-
"Processing audio event room=%s sender=%s mime=%s",
80+
"Processing audio event room=%s sender=%s mime=%s encrypted=%s",
7381
room.room_id, # type: ignore[attr-defined]
7482
event.sender,
7583
mime,
84+
encrypted_file is not None,
7685
)
7786

78-
audio_bytes = await self._converter.download_mxc(str(mxc_url))
87+
audio_bytes = await self._converter.download_mxc(str(mxc_url), encrypted_file)
7988
if audio_bytes is None:
8089
await self._send_plain(room.room_id, self.strings.no_audio_url) # type: ignore[attr-defined]
8190
return
@@ -119,4 +128,5 @@ async def _send_plain(self, room_id: str, text: str) -> None:
119128
room_id,
120129
"m.room.message",
121130
{"msgtype": "m.text", "body": text},
131+
ignore_unverified_devices=True,
122132
)

src/config.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,12 +11,15 @@ class Settings(BaseSettings):
1111
matrix_hs_url: str
1212
matrix_user_id: str
1313
matrix_access_token: str
14+
matrix_password: str | None = None
1415

1516
locale: str = "en"
1617

1718
asr_model_name: str = "nvidia/parakeet-tdt-0.6b-v2"
1819

1920
max_audio_bytes: int = 25 * 1024 * 1024
2021

22+
store_path: str = "/data/store"
23+
2124
def matrix_homeserver_base(self) -> str:
2225
return self.matrix_hs_url.rstrip("/")

0 commit comments

Comments
 (0)