speechmatics-python-eg/agent.py at main · TudorCRL/speechmatics-python-eg · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
import asyncio
import io
import os
import ssl
import sys

import pyaudio
from dotenv import load_dotenv
from speechmatics_flow.client import WebsocketClient
from speechmatics_flow.models import (
    AudioSettings,
    ConnectionSettings,
    ConversationConfig,
    Interaction,
    ServerMessageType,
)

load_dotenv()


ssl_context = ssl.create_default_context()
ssl_context.check_hostname = False
ssl_context.verify_mode = ssl.CERT_NONE
client = WebsocketClient(
    ConnectionSettings(
        url="wss://flow.api.speechmatics.com/v1/flow",
        auth_token=os.getenv("SPEECHMATICS_API_KEY"),
        ssl_context=ssl_context,
    )
)

# Create a buffer to store binary messages sent from the server
audio_buffer = io.BytesIO()


# Create callback function which adds binary messages to audio buffer
def binary_msg_handler(msg: bytes):
    if isinstance(msg, (bytes, bytearray)):
        audio_buffer.write(msg)


# Register the callback to be called when the client receives an audio message from the server
client.add_event_handler(ServerMessageType.audio, binary_msg_handler)


async def audio_playback():
    """Read from buffer and play audio back to the user"""
    p = pyaudio.PyAudio()
    stream = p.open(format=pyaudio.paInt16, channels=1, rate=16000, output=True)
    try:
        while True:
            audio_to_play = audio_buffer.getvalue()
            if audio_to_play:
                stream.write(audio_to_play)
                audio_buffer.seek(0)
                audio_buffer.truncate(0)

            # Pause briefly before checking the buffer again
            await asyncio.sleep(0.05)
    finally:
        stream.close()
        stream.stop_stream()
        p.terminate()


async def main():
    tasks = [
        asyncio.create_task(
            client.run(
                interactions=[Interaction(sys.stdin.buffer)],
                audio_settings=AudioSettings(),
                conversation_config=ConversationConfig(
                    template_id="flow-service-assistant-humphrey",
                    template_variables={
                        "persona": "You are a joyful old man full of knowledge.",
                        "style": "Be charming and sassy. Be helpful in your answers without being patronising.",
                        "context": "You are having a conversation about history with another person.",
                    },
                ),
            )
        ),
        asyncio.create_task(audio_playback()),
    ]

    await asyncio.gather(*tasks)


asyncio.run(main())