From 9f11e6521b1f73f16db04b8fe27fdac162983354 Mon Sep 17 00:00:00 2001 From: RiannaBarrett Date: Mon, 17 Jun 2024 15:26:52 -0400 Subject: [PATCH 01/14] picovoice configuration --- wake-word-detect.py | 153 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 153 insertions(+) create mode 100644 wake-word-detect.py diff --git a/wake-word-detect.py b/wake-word-detect.py new file mode 100644 index 0000000..0fbf11b --- /dev/null +++ b/wake-word-detect.py @@ -0,0 +1,153 @@ +from openai import OpenAI +import sounddevice as sd +import numpy as np +import tempfile +import pvporcupine +import wavio +import os +import time + +# Retrieve the OpenAI API key and Porcupine access key from environment variables +openai_api_key = os.getenv("OPENAI_API_KEY") +porcupine_access_key = os.getenv("PORCUPINE_ACCESS_KEY") + +if not openai_api_key: + raise ValueError("OpenAI API key is not set in environment variables.") +if not porcupine_access_key: + raise ValueError("Porcupine access key is not set in environment variables.") + +# Initialize OpenAI client +client = OpenAI(api_key=openai_api_key, default_headers={"OpenAI-Beta": "assistants=v2"}) + +# Initialize Porcupine +porcupine = pvporcupine.create( + access_key=porcupine_access_key, + keywords=['picovoice', 'bumblebee'] +) + +def get_next_audio_frame(): + """ + Record a chunk of audio from the microphone. + """ + return sd.rec(int(porcupine.frame_length), samplerate=porcupine.sample_rate, channels=1, dtype='int16') + +def query_and_record(prompt, mp3_filename): + """ + Send a prompt to the OpenAI assistant and record the response as an MP3 file. + """ + # Create an assistant instance + assistant = client.beta.assistants.create( + name="Senior Tech Help", + instructions="You are a helpful tech teacher specifically for seniors. You will help older adults (ages 50+) with quick questions about smartphones, voice assistants, computers, cameras, the internet, digital shopping, or any other technology-related topic. You will always ask for specifics, like what device or phone they are using, and provide them with step-by-step instructions for their response.", + model="gpt-4o" + ) + + # Create a thread for communication + thread = client.beta.threads.create() + + # Send user's prompt to the AI + message = client.beta.threads.messages.create( + thread_id=thread.id, + role="user", + content=prompt + ) + + # Start the AI to process the user prompt + run = client.beta.threads.runs.create( + thread_id=thread.id, + assistant_id=assistant.id, + instructions="Please address the user as Jane Doe. The user has a premium account." + ) + + # Wait until AI is complete with processing + while run.status in ["in_progress", "queued"]: + time.sleep(1) + run = client.beta.threads.runs.retrieve( + thread_id=thread.id, + run_id=run.id + ) + + if run.status == "completed": + message_list = client.beta.threads.messages.list( + thread_id=thread.id + ) + + # Extract the text from the response + text_response = message_list.data[-1].content + + # Generate an audio response from the text + response = client.audio.speech.create( + model="tts-1-hd", + voice="echo", + input=text_response, + ) + + response.stream_to_file(mp3_filename) + + print("Response recorded to " + mp3_filename) + +def is_silent(file, threshold=500): + """ + Returns 'True' if below the 'silent' threshold. + """ + return np.abs(file).mean() < threshold + +def record_audio(samplerate=44100, chunk_duration=1, silence_threshold=500, min_chunks=5): + """ + Record audio from the microphone until silence is detected. + """ + print("Recording... Press Ctrl+C to stop.") + audio_file = [] + + try: + while True: + recording = sd.rec(int(chunk_duration * samplerate), samplerate=samplerate, channels=1, dtype='int16') + sd.wait() + audio_file.append(recording) + # Ensure minimum recording length before checking for silence + if len(audio_file) >= min_chunks and is_silent(recording, threshold=silence_threshold): + print("Silence detected, stopping recording.") + break + except KeyboardInterrupt: + print("Recording stopped manually.") + + # Concatenate all recorded chunks + if audio_file: + audio_file = np.concatenate(audio_file, axis=0) + return audio_file + else: + raise ValueError("No audio file recorded.") + +# Main loop for keyword detection and interaction +try: + while True: + audio_frame = get_next_audio_frame() + sd.wait() + keyword_index = porcupine.process(audio_frame.flatten()) + if keyword_index == 0: + print("Detected 'picovoice'") + elif keyword_index == 1: + print("Detected 'bumblebee'") + + # Record audio from the microphone + audio_file = record_audio() + + # Convert audio to text using OpenAI API + with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmpfile: + tmpfilename = tmpfile.name + wavio.write(tmpfilename, audio_file, 44100, sampwidth=2) + + transcription = client.audio.transcriptions.create( + model="whisper-1", + file=open(tmpfilename, "rb"), + ) + + print("Transcription:", transcription.text) + + # Example usage + prompt = transcription.text + mp3_filename = "response.mp3" + query_and_record(prompt, mp3_filename) + +finally: + porcupine.delete() From 62c25c7e2b1fc517917ab57c8c46ad67fd77b242 Mon Sep 17 00:00:00 2001 From: Rianna Date: Wed, 26 Jun 2024 11:51:55 -0400 Subject: [PATCH 02/14] initial commit of wake-word-detect --- wake-word-detect.py | 30 ++++++++++++++++++++++++------ 1 file changed, 24 insertions(+), 6 deletions(-) diff --git a/wake-word-detect.py b/wake-word-detect.py index 0fbf11b..b332552 100644 --- a/wake-word-detect.py +++ b/wake-word-detect.py @@ -1,5 +1,8 @@ from openai import OpenAI +from dotenv import load_dotenv import sounddevice as sd +import struct +import pyaudio import numpy as np import tempfile import pvporcupine @@ -7,6 +10,8 @@ import os import time + +load_dotenv() # Retrieve the OpenAI API key and Porcupine access key from environment variables openai_api_key = os.getenv("OPENAI_API_KEY") porcupine_access_key = os.getenv("PORCUPINE_ACCESS_KEY") @@ -22,14 +27,20 @@ # Initialize Porcupine porcupine = pvporcupine.create( access_key=porcupine_access_key, - keywords=['picovoice', 'bumblebee'] + keywords=["picovoice", "bumblebee"] ) +paud = pyaudio.PyAudio() +audio_frame = paud.open(rate=porcupine.sample_rate, channels=1, format=pyaudio.paInt16, input=True, frames_per_buffer=porcupine.frame_length) + def get_next_audio_frame(): """ Record a chunk of audio from the microphone. """ - return sd.rec(int(porcupine.frame_length), samplerate=porcupine.sample_rate, channels=1, dtype='int16') + + + + # return sd.rec(int(porcupine.frame_length), samplerate=porcupine.sample_rate, channels=1, dtype='int16') def query_and_record(prompt, mp3_filename): """ @@ -121,9 +132,10 @@ def record_audio(samplerate=44100, chunk_duration=1, silence_threshold=500, min_ # Main loop for keyword detection and interaction try: while True: - audio_frame = get_next_audio_frame() - sd.wait() - keyword_index = porcupine.process(audio_frame.flatten()) + keyword = audio_frame.read(porcupine.frame_length) + keyword = struct.unpack_from ("h" * porcupine.frame_length, keyword) + keyword_index= porcupine.process(keyword) + print(keyword_index) if keyword_index == 0: print("Detected 'picovoice'") elif keyword_index == 1: @@ -150,4 +162,10 @@ def record_audio(samplerate=44100, chunk_duration=1, silence_threshold=500, min_ query_and_record(prompt, mp3_filename) finally: - porcupine.delete() +# Ensuring proper release of resources + if porcupine is not None: + porcupine.delete() + if audio_frame is not None: + audio_frame.close() + if paud is not None: + paud.terminate() From 9f3fb3409b81c9bf0ab0177fc77ba2ffa1106775 Mon Sep 17 00:00:00 2001 From: Rianna Date: Thu, 27 Jun 2024 10:49:15 -0400 Subject: [PATCH 03/14] initial config --- wake-word-detect.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/wake-word-detect.py b/wake-word-detect.py index b332552..da711f3 100644 --- a/wake-word-detect.py +++ b/wake-word-detect.py @@ -38,7 +38,7 @@ def get_next_audio_frame(): Record a chunk of audio from the microphone. """ - + return audio_frame.read(porcupine.frame_length) # return sd.rec(int(porcupine.frame_length), samplerate=porcupine.sample_rate, channels=1, dtype='int16') @@ -132,7 +132,8 @@ def record_audio(samplerate=44100, chunk_duration=1, silence_threshold=500, min_ # Main loop for keyword detection and interaction try: while True: - keyword = audio_frame.read(porcupine.frame_length) + # keyword = audio_frame.read(porcupine.frame_length) + keyword = get_next_audio_frame() keyword = struct.unpack_from ("h" * porcupine.frame_length, keyword) keyword_index= porcupine.process(keyword) print(keyword_index) From d9ae9a88e211027729729f5da511506ccd4d10d5 Mon Sep 17 00:00:00 2001 From: Alex Louderback Date: Tue, 2 Jul 2024 09:54:54 -0400 Subject: [PATCH 04/14] Wake Word thru GPT response --- wake-word-detect.py | 111 +++++++++++++++++++++++--------------------- 1 file changed, 57 insertions(+), 54 deletions(-) diff --git a/wake-word-detect.py b/wake-word-detect.py index da711f3..e546ec9 100644 --- a/wake-word-detect.py +++ b/wake-word-detect.py @@ -1,21 +1,24 @@ -from openai import OpenAI from dotenv import load_dotenv import sounddevice as sd import struct -import pyaudio import numpy as np import tempfile import pvporcupine -import wavio +import wave import os import time - +from pvrecorder import PvRecorder +import wavio +from openai import OpenAI load_dotenv() + # Retrieve the OpenAI API key and Porcupine access key from environment variables openai_api_key = os.getenv("OPENAI_API_KEY") porcupine_access_key = os.getenv("PORCUPINE_ACCESS_KEY") +sd.default.device = None #'seeed-2mic-voicecard' + if not openai_api_key: raise ValueError("OpenAI API key is not set in environment variables.") if not porcupine_access_key: @@ -29,18 +32,42 @@ access_key=porcupine_access_key, keywords=["picovoice", "bumblebee"] ) +# paud = pyaudio.PyAudio() +# audio_frame = paud.open(rate=porcupine.sample_rate, channels=1, format=pyaudio.paInt16, input=True, frames_per_buffer=porcupine.frame_length) -paud = pyaudio.PyAudio() -audio_frame = paud.open(rate=porcupine.sample_rate, channels=1, format=pyaudio.paInt16, input=True, frames_per_buffer=porcupine.frame_length) - -def get_next_audio_frame(): +def record_audio(samplerate=44100, chunk_duration=1, silence_threshold=500): """ - Record a chunk of audio from the microphone. + Record audio from the default microphone until silence is detected. """ + print("Recording... Press Ctrl+C to stop.") + audio_file = [] + + try: + while True: + recording = sd.rec(int(chunk_duration * samplerate), samplerate=samplerate, channels=1, dtype='int16') + sd.wait() + audio_file.append(recording) + + # Check if the last recorded chunk is silent + if is_silent(recording, silence_threshold): + print("Silence detected, stopping recording.") + break + + except KeyboardInterrupt: + print("Recording stopped manually.") + + if audio_file: + audio_file = np.concatenate(audio_file, axis=0) + return audio_file + else: + raise ValueError("No audio file recorded.") - return audio_frame.read(porcupine.frame_length) - - # return sd.rec(int(porcupine.frame_length), samplerate=porcupine.sample_rate, channels=1, dtype='int16') +def is_silent(file, threshold=500): + + """ + Returns True if the audio file is below the silent threshold. + """ + return np.abs(file).mean() < threshold def query_and_record(prompt, mp3_filename): """ @@ -83,8 +110,11 @@ def query_and_record(prompt, mp3_filename): thread_id=thread.id ) - # Extract the text from the response - text_response = message_list.data[-1].content + # Extract the text content from the response + text_response = "" + for message in message_list.data: + if message.role == "assistant" and message.content: + text_response += message.content + "\n" # Generate an audio response from the text response = client.audio.speech.create( @@ -97,46 +127,17 @@ def query_and_record(prompt, mp3_filename): print("Response recorded to " + mp3_filename) -def is_silent(file, threshold=500): - """ - Returns 'True' if below the 'silent' threshold. - """ - return np.abs(file).mean() < threshold - -def record_audio(samplerate=44100, chunk_duration=1, silence_threshold=500, min_chunks=5): - """ - Record audio from the microphone until silence is detected. - """ - print("Recording... Press Ctrl+C to stop.") - audio_file = [] - - try: - while True: - recording = sd.rec(int(chunk_duration * samplerate), samplerate=samplerate, channels=1, dtype='int16') - sd.wait() - audio_file.append(recording) - # Ensure minimum recording length before checking for silence - if len(audio_file) >= min_chunks and is_silent(recording, threshold=silence_threshold): - print("Silence detected, stopping recording.") - break - except KeyboardInterrupt: - print("Recording stopped manually.") - - # Concatenate all recorded chunks - if audio_file: - audio_file = np.concatenate(audio_file, axis=0) - return audio_file - else: - raise ValueError("No audio file recorded.") # Main loop for keyword detection and interaction +recorder = PvRecorder(frame_length=porcupine.frame_length) +recorder.start() +wav_file = None + try: while True: - # keyword = audio_frame.read(porcupine.frame_length) - keyword = get_next_audio_frame() - keyword = struct.unpack_from ("h" * porcupine.frame_length, keyword) - keyword_index= porcupine.process(keyword) - print(keyword_index) + pcm = recorder.read() + keyword_index = porcupine.process(pcm) + if keyword_index == 0: print("Detected 'picovoice'") elif keyword_index == 1: @@ -162,11 +163,13 @@ def record_audio(samplerate=44100, chunk_duration=1, silence_threshold=500, min_ mp3_filename = "response.mp3" query_and_record(prompt, mp3_filename) +except KeyboardInterrupt: + print("Script interrupted.") finally: # Ensuring proper release of resources if porcupine is not None: porcupine.delete() - if audio_frame is not None: - audio_frame.close() - if paud is not None: - paud.terminate() + # if audio_frame is not None: + # audio_frame.close() + # if paud is not None: + # paud.terminate() From e741c14c555dec6ecfb6f3df7e29d44d17e3f14d Mon Sep 17 00:00:00 2001 From: Rianna Barett Date: Tue, 2 Jul 2024 10:12:00 -0400 Subject: [PATCH 05/14] initial config voice ai --- wake-word-detect.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/wake-word-detect.py b/wake-word-detect.py index e546ec9..610d921 100644 --- a/wake-word-detect.py +++ b/wake-word-detect.py @@ -111,10 +111,12 @@ def query_and_record(prompt, mp3_filename): ) # Extract the text content from the response - text_response = "" - for message in message_list.data: - if message.role == "assistant" and message.content: - text_response += message.content + "\n" + # text_response = "" + # for message in message_list.data: + # if message.role == "assistant" and message.content: + # text_response += message.content + "\n" + + text_response = message_list.data[0].content[0].text.value # Generate an audio response from the text response = client.audio.speech.create( From ffcb600d0ff4e82d25a9cbb9974053e31dec9cbc Mon Sep 17 00:00:00 2001 From: Rianna Barett Date: Tue, 9 Jul 2024 09:37:00 -0400 Subject: [PATCH 06/14] wake word assistant with assistant api --- wake-word-assistant.py | 179 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 179 insertions(+) create mode 100644 wake-word-assistant.py diff --git a/wake-word-assistant.py b/wake-word-assistant.py new file mode 100644 index 0000000..bbb00a3 --- /dev/null +++ b/wake-word-assistant.py @@ -0,0 +1,179 @@ +from dotenv import load_dotenv +import sounddevice as sd +import struct +import numpy as np +import tempfile +import pvporcupine +import wave +import os +import time +from pvrecorder import PvRecorder +import wavio +from openai import OpenAI + +load_dotenv() + +# Retrieve the OpenAI API key and Porcupine access key from environment variables +openai_api_key = os.getenv("OPENAI_API_KEY") +porcupine_access_key = os.getenv("PORCUPINE_ACCESS_KEY") +assistant_api_key = os.getenv("ASSSISTANT_API_KEY") + +sd.default.device = None #'seeed-2mic-voicecard' + +if not openai_api_key: + raise ValueError("OpenAI API key is not set in environment variables.") +if not porcupine_access_key: + raise ValueError("Porcupine access key is not set in environment variables.") + +# Initialize OpenAI client +client = OpenAI(api_key=openai_api_key, default_headers={"OpenAI-Beta": "assistants=v2"}) + +# Initialize Porcupine +porcupine = pvporcupine.create( + access_key=porcupine_access_key, + keywords=["picovoice", "bumblebee"] +) +# paud = pyaudio.PyAudio() +# audio_frame = paud.open(rate=porcupine.sample_rate, channels=1, format=pyaudio.paInt16, input=True, frames_per_buffer=porcupine.frame_length) + +def record_audio(samplerate=44100, chunk_duration=1, silence_threshold=500): + """ + Record audio from the default microphone until silence is detected. + """ + print("Recording... Press Ctrl+C to stop.") + audio_file = [] + + try: + while True: + recording = sd.rec(int(chunk_duration * samplerate), samplerate=samplerate, channels=1, dtype='int16') + sd.wait() + audio_file.append(recording) + + # Check if the last recorded chunk is silent + if is_silent(recording, silence_threshold): + print("Silence detected, stopping recording.") + break + + except KeyboardInterrupt: + print("Recording stopped manually.") + + if audio_file: + audio_file = np.concatenate(audio_file, axis=0) + return audio_file + else: + raise ValueError("No audio file recorded.") + +def is_silent(file, threshold=500): + + """ + Returns True if the audio file is below the silent threshold. + """ + return np.abs(file).mean() < threshold + +def query_and_record(prompt, mp3_filename): + """ + Send a prompt to the OpenAI assistant and record the response as an MP3 file. + """ + # # Create an assistant instance + # assistant = client.beta.assistants.create( + # name="Senior Tech Help", + # instructions="You are a helpful tech teacher specifically for seniors. You will help older adults (ages 50+) with quick questions about smartphones, voice assistants, computers, cameras, the internet, digital shopping, or any other technology-related topic. You will always ask for specifics, like what device or phone they are using, and provide them with step-by-step instructions for their response.", + # model="gpt-4o" + # ) + assistant_id = assistant_api_key + + # Create a thread for communication + thread = client.beta.threads.create() + + # Send user's prompt to the AI + message = client.beta.threads.messages.create( + thread_id=thread.id, + role="user", + content=prompt + ) + + # Start the AI to process the user prompt + run = client.beta.threads.runs.create( + thread_id=thread.id, + assistant_id=assistant_id, + instructions="Please address the user as Jane Doe. The user has a premium account." + ) + + # Wait until AI is complete with processing + while run.status in ["in_progress", "queued"]: + time.sleep(1) + run = client.beta.threads.runs.retrieve( + thread_id=thread.id, + run_id=run.id + ) + + if run.status == "completed": + message_list = client.beta.threads.messages.list( + thread_id=thread.id + ) + + # Extract the text content from the response + # text_response = "" + # for message in message_list.data: + # if message.role == "assistant" and message.content: + # text_response += message.content + "\n" + + text_response = message_list.data[-1].content + + # Generate an audio response from the text + response = client.audio.speech.create( + model="tts-1-hd", + voice="echo", + input=text_response, + ) + + response.stream_to_file(mp3_filename) + + print("Response recorded to " + mp3_filename) + + +# Main loop for keyword detection and interaction +recorder = PvRecorder(frame_length=porcupine.frame_length) +recorder.start() +wav_file = None + +try: + while True: + pcm = recorder.read() + keyword_index = porcupine.process(pcm) + + if keyword_index == 0: + print("Detected 'picovoice'") + elif keyword_index == 1: + print("Detected 'bumblebee'") + + # Record audio from the microphone + audio_file = record_audio() + + # Convert audio to text using OpenAI API + with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmpfile: + tmpfilename = tmpfile.name + wavio.write(tmpfilename, audio_file, 44100, sampwidth=2) + + transcription = client.audio.transcriptions.create( + model="whisper-1", + file=open(tmpfilename, "rb"), + ) + + print("Transcription:", transcription.text) + + # Example usage + prompt = transcription.text + mp3_filename = "response.mp3" + query_and_record(prompt, mp3_filename) + +except KeyboardInterrupt: + print("Script interrupted.") +finally: +# Ensuring proper release of resources + if porcupine is not None: + porcupine.delete() + # if audio_frame is not None: + # audio_frame.close() + # if paud is not None: + # paud.terminate() From 817bcd47ab76315dc5a7e4fd7521904022c84c73 Mon Sep 17 00:00:00 2001 From: RiannaBarrett Date: Wed, 10 Jul 2024 09:39:30 -0400 Subject: [PATCH 07/14] working with assistant link --- wake-word-assistant.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/wake-word-assistant.py b/wake-word-assistant.py index bbb00a3..da94862 100644 --- a/wake-word-assistant.py +++ b/wake-word-assistant.py @@ -16,7 +16,7 @@ # Retrieve the OpenAI API key and Porcupine access key from environment variables openai_api_key = os.getenv("OPENAI_API_KEY") porcupine_access_key = os.getenv("PORCUPINE_ACCESS_KEY") -assistant_api_key = os.getenv("ASSSISTANT_API_KEY") +assistant_api_key = os.getenv("ASSISTANT_API_KEY") sd.default.device = None #'seeed-2mic-voicecard' @@ -118,7 +118,7 @@ def query_and_record(prompt, mp3_filename): # if message.role == "assistant" and message.content: # text_response += message.content + "\n" - text_response = message_list.data[-1].content + text_response = message_list.data[0].content[0].text.value # Generate an audio response from the text response = client.audio.speech.create( From 327a0d401b7dcf7d5297dfc6a69b8327f44eab49 Mon Sep 17 00:00:00 2001 From: RiannaBarrett Date: Wed, 10 Jul 2024 10:46:10 -0400 Subject: [PATCH 08/14] MP3 response plays once generated --- wake-word-assistant.py | 42 +++++++++++++++++++++++++----------------- 1 file changed, 25 insertions(+), 17 deletions(-) diff --git a/wake-word-assistant.py b/wake-word-assistant.py index da94862..e1040d8 100644 --- a/wake-word-assistant.py +++ b/wake-word-assistant.py @@ -10,6 +10,7 @@ from pvrecorder import PvRecorder import wavio from openai import OpenAI +import pygame load_dotenv() @@ -19,7 +20,7 @@ assistant_api_key = os.getenv("ASSISTANT_API_KEY") sd.default.device = None #'seeed-2mic-voicecard' - + if not openai_api_key: raise ValueError("OpenAI API key is not set in environment variables.") if not porcupine_access_key: @@ -33,10 +34,8 @@ access_key=porcupine_access_key, keywords=["picovoice", "bumblebee"] ) -# paud = pyaudio.PyAudio() -# audio_frame = paud.open(rate=porcupine.sample_rate, channels=1, format=pyaudio.paInt16, input=True, frames_per_buffer=porcupine.frame_length) -def record_audio(samplerate=44100, chunk_duration=1, silence_threshold=500): +def record_audio(samplerate=44100, chunk_duration=1, silence_threshold=2000): """ Record audio from the default microphone until silence is detected. """ @@ -64,17 +63,16 @@ def record_audio(samplerate=44100, chunk_duration=1, silence_threshold=500): raise ValueError("No audio file recorded.") def is_silent(file, threshold=500): - """ Returns True if the audio file is below the silent threshold. """ return np.abs(file).mean() < threshold -def query_and_record(prompt, mp3_filename): +def query_and_record(prompt): """ Send a prompt to the OpenAI assistant and record the response as an MP3 file. """ - # # Create an assistant instance + # # Create an assistant instance # assistant = client.beta.assistants.create( # name="Senior Tech Help", # instructions="You are a helpful tech teacher specifically for seniors. You will help older adults (ages 50+) with quick questions about smartphones, voice assistants, computers, cameras, the internet, digital shopping, or any other technology-related topic. You will always ask for specifics, like what device or phone they are using, and provide them with step-by-step instructions for their response.", @@ -111,13 +109,13 @@ def query_and_record(prompt, mp3_filename): message_list = client.beta.threads.messages.list( thread_id=thread.id ) - - # Extract the text content from the response + # Extract the text content from the response # text_response = "" # for message in message_list.data: # if message.role == "assistant" and message.content: # text_response += message.content + "\n" + text_response = message_list.data[0].content[0].text.value # Generate an audio response from the text @@ -127,15 +125,28 @@ def query_and_record(prompt, mp3_filename): input=text_response, ) - response.stream_to_file(mp3_filename) + with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as tmpfile: + mp3_filename = tmpfile.name + response.stream_to_file(mp3_filename) print("Response recorded to " + mp3_filename) + # Initialize pygame mixer + pygame.mixer.init() + + # Load the mp3 file + pygame.mixer.music.load(mp3_filename) + + # Play the mp3 file + pygame.mixer.music.play() + + # Wait until the music finishes playing + while pygame.mixer.music.get_busy(): + pygame.time.Clock().tick(10) # Main loop for keyword detection and interaction recorder = PvRecorder(frame_length=porcupine.frame_length) recorder.start() -wav_file = None try: while True: @@ -164,8 +175,7 @@ def query_and_record(prompt, mp3_filename): # Example usage prompt = transcription.text - mp3_filename = "response.mp3" - query_and_record(prompt, mp3_filename) + query_and_record(prompt) except KeyboardInterrupt: print("Script interrupted.") @@ -173,7 +183,5 @@ def query_and_record(prompt, mp3_filename): # Ensuring proper release of resources if porcupine is not None: porcupine.delete() - # if audio_frame is not None: - # audio_frame.close() - # if paud is not None: - # paud.terminate() + recorder.stop() + recorder.delete() From 0e4ce4235ca6a472fdb885d5ed596ed51566d4e2 Mon Sep 17 00:00:00 2001 From: RiannaBarrett Date: Thu, 11 Jul 2024 12:44:23 -0400 Subject: [PATCH 09/14] Create New Thread when voice AI is started --- wake-word-thread.py | 202 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 202 insertions(+) create mode 100644 wake-word-thread.py diff --git a/wake-word-thread.py b/wake-word-thread.py new file mode 100644 index 0000000..7e91f51 --- /dev/null +++ b/wake-word-thread.py @@ -0,0 +1,202 @@ +from dotenv import load_dotenv +import sounddevice as sd +import struct +import numpy as np +import tempfile +import pvporcupine +import wave +import os +import time +from pvrecorder import PvRecorder +import wavio +from openai import OpenAI +import pygame +import threading + +load_dotenv() + +# Retrieve the OpenAI API key and Porcupine access key from environment variables +openai_api_key = os.getenv("OPENAI_API_KEY") +porcupine_access_key = os.getenv("PORCUPINE_ACCESS_KEY") +assistant_api_key = os.getenv("ASSISTANT_API_KEY") + +sd.default.device = None # 'seeed-2mic-voicecard' + +if not openai_api_key: + raise ValueError("OpenAI API key is not set in environment variables.") +if not porcupine_access_key: + raise ValueError("Porcupine access key is not set in environment variables.") + +# Initialize OpenAI client +client = OpenAI(api_key=openai_api_key, default_headers={"OpenAI-Beta": "assistants=v2"}) + +# Initialize Porcupine +porcupine = pvporcupine.create( + access_key=porcupine_access_key, + keywords=["picovoice", "bumblebee"] +) + +thread_id = None #store the thread ID + +def record_audio(samplerate=44100, chunk_duration=1, silence_threshold=2000): + """ + Record audio from the default microphone until silence is detected. + """ + print("Recording... Press Ctrl+C to stop.") + audio_file = [] + + try: + while True: + recording = sd.rec(int(chunk_duration * samplerate), samplerate=samplerate, channels=1, dtype='int16') + sd.wait() + audio_file.append(recording) + + # Check if the last recorded chunk is silent + if is_silent(recording, silence_threshold): + print("Silence detected, stopping recording.") + break + + except KeyboardInterrupt: + print("Recording stopped manually.") + + if audio_file: + audio_file = np.concatenate(audio_file, axis=0) + return audio_file + else: + raise ValueError("No audio file recorded.") + +def is_silent(file, threshold=500): + """ + Returns True if the audio file is below the silent threshold. + """ + return np.abs(file).mean() < threshold + +def query_and_record(prompt): + """ + Send a prompt to the OpenAI assistant and record the response as an MP3 file. + """ + # # Create an assistant instance + # assistant = client.beta.assistants.create( + # name="Senior Tech Help", + # instructions="You are a helpful tech teacher specifically for seniors. You will help older adults (ages 50+) with quick questions about smartphones, voice assistants, computers, cameras, the internet, digital shopping, or any other technology-related topic. You will always ask for specifics, like what device or phone they are using, and provide them with step-by-step instructions for their response.", + # model="gpt-4o" + # ) + assistant_id = assistant_api_key + + global thread_id # Access the global thread ID + + if thread_id is None: + # Create a thread for communication + thread = client.beta.threads.create() + thread_id = thread.id + print(f"New thread created with ID: {thread_id}") + else: + # Retrieve the existing thread + thread = client.beta.threads.retrieve(thread_id) + print(f"Using existing thread with ID: {thread_id}") + + # Send user's prompt to the AI + message = client.beta.threads.messages.create( + thread_id=thread.id, + role="user", + content=prompt + ) + + # Start the AI to process the user prompt + run = client.beta.threads.runs.create( + thread_id=thread.id, + assistant_id=assistant_id, + instructions="Please address the user as Jane Doe. The user has a premium account." + ) + + # Wait until AI is complete with processing + while run.status in ["in_progress", "queued"]: + time.sleep(1) + run = client.beta.threads.runs.retrieve( + thread_id=thread.id, + run_id=run.id + ) + + if run.status == "completed": + message_list = client.beta.threads.messages.list( + thread_id=thread.id + ) + + text_response = message_list.data[0].content[0].text.value + + # Generate an audio response from the text + response = client.audio.speech.create( + model="tts-1-hd", + voice="echo", + input=text_response, + ) + + with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as tmpfile: + mp3_filename = tmpfile.name + response.stream_to_file(mp3_filename) + + print("Response recorded to " + mp3_filename) + + # Initialize pygame mixer + pygame.mixer.init() + + # Load the mp3 file + pygame.mixer.music.load(mp3_filename) + + # Play the mp3 file + pygame.mixer.music.play() + + # Wait until the response finishes playing + while pygame.mixer.music.get_busy(): + pygame.time.Clock().tick(10) + +def handle_interaction(prompt): + """ + Handle the interaction with the AI in a separate thread. + """ + query_and_record(prompt) + +# Main loop for keyword detection and interaction +recorder = PvRecorder(frame_length=porcupine.frame_length) +recorder.start() + +try: + while True: + pcm = recorder.read() + keyword_index = porcupine.process(pcm) + + if keyword_index == 0: + print("Detected 'picovoice'") + elif keyword_index == 1: + print("Detected 'bumblebee'") + + # Record audio from the microphone + audio_file = record_audio() + + # Convert audio to text using OpenAI API + with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmpfile: + tmpfilename = tmpfile.name + wavio.write(tmpfilename, audio_file, 44100, sampwidth=2) + + transcription = client.audio.transcriptions.create( + model="whisper-1", + file=open(tmpfilename, "rb"), + ) + + print("Transcription:", transcription.text) + + # Example usage + prompt = transcription.text + + # Start a new thread for handling the interaction + interaction_thread = threading.Thread(target=handle_interaction, args=(prompt,)) + interaction_thread.start() + +except KeyboardInterrupt: + print("Script interrupted.") +finally: +# Ensuring proper release of resources + if porcupine is not None: + porcupine.delete() + recorder.stop() + recorder.delete() From 6a06aae4e26b7e1cd05542e8863a10b438c4886b Mon Sep 17 00:00:00 2001 From: RiannaBarrett Date: Thu, 18 Jul 2024 12:04:50 -0400 Subject: [PATCH 10/14] wake word to response working. Silence detection improved --- response.py | 99 +++++++++++++++++++++++++++++++++++++++ wake-word-audio.py | 113 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 212 insertions(+) create mode 100644 response.py create mode 100644 wake-word-audio.py diff --git a/response.py b/response.py new file mode 100644 index 0000000..32affe1 --- /dev/null +++ b/response.py @@ -0,0 +1,99 @@ +import tempfile +import os +from openai import OpenAI +import pygame +import time +import sounddevice as sd +from dotenv import load_dotenv + +load_dotenv() + +# Retrieve the OpenAI API key and Porcupine access key from environment variables +openai_api_key = os.getenv("OPENAI_API_KEY") +assistant_api_key = os.getenv("ASSISTANT_API_KEY") + +sd.default.device = None # 'seeed-2mic-voicecard' + +# Initialize OpenAI client +client = OpenAI(api_key=openai_api_key, default_headers={"OpenAI-Beta": "assistants=v2"}) + +thread_id = None # Store the thread ID + +def query_and_record(prompt): + """ + Send a prompt to the OpenAI assistant and record the response as an MP3 file. + """ + assistant_id = assistant_api_key + + global thread_id # Access the global thread ID + + if thread_id is None: + # Create a thread for communication + thread = client.beta.threads.create() + thread_id = thread.id + print(f"New thread created with ID: {thread_id}") + else: + # Retrieve the existing thread + thread = client.beta.threads.retrieve(thread_id) + print(f"Using existing thread with ID: {thread_id}") + + # Send user's prompt to the AI + message = client.beta.threads.messages.create( + thread_id=thread.id, + role="user", + content=prompt + ) + + # Start the AI to process the user prompt + run = client.beta.threads.runs.create( + thread_id=thread.id, + assistant_id=assistant_id, + instructions="Please address the user as Jane Doe. The user has a premium account." + ) + + # Wait until AI is complete with processing + while run.status in ["in_progress", "queued"]: + time.sleep(1) + run = client.beta.threads.runs.retrieve( + thread_id=thread.id, + run_id=run.id + ) + + if run.status == "completed": + message_list = client.beta.threads.messages.list( + thread_id=thread.id + ) + + text_response = message_list.data[0].content[0].text.value + + # Generate an audio response from the text + response = client.audio.speech.create( + model="tts-1-hd", + voice="echo", + input=text_response, + ) + + with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as tmpfile: + mp3_filename = tmpfile.name + response.stream_to_file(mp3_filename) + + print("Response recorded to " + mp3_filename) + + # Initialize pygame mixer + pygame.mixer.init() + + # Load the mp3 file + pygame.mixer.music.load(mp3_filename) + + # Play the mp3 file + pygame.mixer.music.play() + + # Wait until the response finishes playing + while pygame.mixer.music.get_busy(): + pygame.time.Clock().tick(10) + +def handle_interaction(prompt): + """ + Handle the interaction with the AI. + """ + query_and_record(prompt) diff --git a/wake-word-audio.py b/wake-word-audio.py new file mode 100644 index 0000000..61c012f --- /dev/null +++ b/wake-word-audio.py @@ -0,0 +1,113 @@ +import sounddevice as sd +import numpy as np +import tempfile +import wavio +from openai import OpenAI +import threading +import os +import pvporcupine +from pvrecorder import PvRecorder +import time +from response import handle_interaction # Import the interaction handling function +from dotenv import load_dotenv + +load_dotenv() + +# Retrieve the OpenAI API key and Porcupine access key from environment variables +openai_api_key = os.getenv("OPENAI_API_KEY") +porcupine_access_key = os.getenv("PORCUPINE_ACCESS_KEY") + +sd.default.device = None # 'seeed-2mic-voicecard' + +if not openai_api_key: + raise ValueError("OpenAI API key is not set in environment variables.") +if not porcupine_access_key: + raise ValueError("Porcupine access key is not set in environment variables.") + +# Initialize Porcupine +porcupine = pvporcupine.create( + access_key=porcupine_access_key, + keywords=["picovoice", "bumblebee"] +) + +# Initialize the OpenAI client +client = OpenAI(api_key=openai_api_key, default_headers={"OpenAI-Beta": "assistants=v2"}) + +def record_audio(samplerate=44100, chunk_duration=1, silence_threshold=2000, timeout=5): + """ + Record audio from the default microphone until silence is detected, + with a timeout period to allow capturing additional audio. + """ + print("Recording... Press Ctrl+C to stop.") + audio_file = [] + + start_time = time.time() # Record the start time + try: + while True: + recording = sd.rec(int(chunk_duration * samplerate), samplerate=samplerate, channels=1, dtype='int16') + sd.wait() + audio_file.append(recording) + + # Check if the last recorded chunk is silent + if is_silent(recording, silence_threshold): + # Check if timeout period has elapsed + if time.time() - start_time > timeout: + print("Timeout reached, stopping recording.") + break + else: + print("Silence detected, waiting for more audio...") + + except KeyboardInterrupt: + print("Recording stopped manually.") + + if audio_file: + audio_file = np.concatenate(audio_file, axis=0) + return audio_file + else: + raise ValueError("No audio file recorded.") + +def is_silent(file, threshold=500): + """ + Returns True if the audio file is below the silent threshold. + """ + return np.abs(file).mean() < threshold + +recorder = PvRecorder(frame_length=porcupine.frame_length) +recorder.start() + +try: + while True: + pcm = recorder.read() + keyword_index = porcupine.process(pcm) + + if keyword_index == 0: + print("Detected 'picovoice'") + elif keyword_index == 1: + print("Detected 'bumblebee'") + + # Record audio from the microphone with extended silence detection + audio_file = record_audio(timeout=5) # Adjust timeout + + # Convert audio to text using OpenAI API + with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmpfile: + tmpfilename = tmpfile.name + wavio.write(tmpfilename, audio_file, 44100, sampwidth=2) + + transcription = client.audio.transcriptions.create( + model="whisper-1", + file=open(tmpfilename, "rb"), + ) + + print("Transcription:", transcription.text) + + # Start a new thread for handling the interaction + interaction_thread = threading.Thread(target=handle_interaction, args=(transcription.text,)) + interaction_thread.start() + +except KeyboardInterrupt: + print("Script interrupted.") +finally: + if porcupine is not None: + porcupine.delete() + recorder.stop() + recorder.delete() \ No newline at end of file From 387d2edca7ed98f36999d7e6b3714f61067b4dca Mon Sep 17 00:00:00 2001 From: RiannaBarrett Date: Tue, 23 Jul 2024 11:57:38 -0400 Subject: [PATCH 11/14] silence is detection when the user stops speaking --- response.py | 100 ++++++++++++++++++++++++++------------------- wake-word-audio.py | 63 ++++++++++++++-------------- 2 files changed, 88 insertions(+), 75 deletions(-) diff --git a/response.py b/response.py index 32affe1..0062ab3 100644 --- a/response.py +++ b/response.py @@ -27,6 +27,10 @@ def query_and_record(prompt): global thread_id # Access the global thread ID + if not prompt.strip(): + print("Error: The prompt is empty.") + return + if thread_id is None: # Create a thread for communication thread = client.beta.threads.create() @@ -37,60 +41,70 @@ def query_and_record(prompt): thread = client.beta.threads.retrieve(thread_id) print(f"Using existing thread with ID: {thread_id}") - # Send user's prompt to the AI - message = client.beta.threads.messages.create( - thread_id=thread.id, - role="user", - content=prompt - ) - - # Start the AI to process the user prompt - run = client.beta.threads.runs.create( - thread_id=thread.id, - assistant_id=assistant_id, - instructions="Please address the user as Jane Doe. The user has a premium account." - ) - - # Wait until AI is complete with processing - while run.status in ["in_progress", "queued"]: - time.sleep(1) - run = client.beta.threads.runs.retrieve( + try: + # Send user's prompt to the AI + message = client.beta.threads.messages.create( thread_id=thread.id, - run_id=run.id + role="user", + content=prompt ) - - if run.status == "completed": - message_list = client.beta.threads.messages.list( - thread_id=thread.id + + # Start the AI to process the user prompt + run = client.beta.threads.runs.create( + thread_id=thread.id, + assistant_id=assistant_id, + instructions="Please address the user as Jane Doe. The user has a premium account." ) + + # Wait until AI is complete with processing + while run.status in ["in_progress", "queued"]: + time.sleep(1) + run = client.beta.threads.runs.retrieve( + thread_id=thread.id, + run_id=run.id + ) - text_response = message_list.data[0].content[0].text.value + if run.status == "completed": + message_list = client.beta.threads.messages.list( + thread_id=thread.id + ) - # Generate an audio response from the text - response = client.audio.speech.create( - model="tts-1-hd", - voice="echo", - input=text_response, - ) + # Ensure there's a valid response + if not message_list.data or not message_list.data[0].content: + print("Error: No response content available.") + return + + text_response = message_list.data[0].content[0].text.value + + # Generate an audio response from the text + response = client.audio.speech.create( + model="tts-1-hd", + voice="echo", + input=text_response, + ) + + with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as tmpfile: + mp3_filename = tmpfile.name + response.stream_to_file(mp3_filename) + + print("Response recorded to " + mp3_filename) - with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as tmpfile: - mp3_filename = tmpfile.name - response.stream_to_file(mp3_filename) + # Initialize pygame mixer + pygame.mixer.init() - print("Response recorded to " + mp3_filename) + # Load the mp3 file + pygame.mixer.music.load(mp3_filename) - # Initialize pygame mixer - pygame.mixer.init() + # Play the mp3 file + pygame.mixer.music.play() - # Load the mp3 file - pygame.mixer.music.load(mp3_filename) + # Wait until the response finishes playing + while pygame.mixer.music.get_busy(): + pygame.time.Clock().tick(10) - # Play the mp3 file - pygame.mixer.music.play() + except Exception as e: + print("Error during query and recording:", str(e)) - # Wait until the response finishes playing - while pygame.mixer.music.get_busy(): - pygame.time.Clock().tick(10) def handle_interaction(prompt): """ diff --git a/wake-word-audio.py b/wake-word-audio.py index 61c012f..46102b5 100644 --- a/wake-word-audio.py +++ b/wake-word-audio.py @@ -8,7 +8,7 @@ import pvporcupine from pvrecorder import PvRecorder import time -from response import handle_interaction # Import the interaction handling function +from response import handle_interaction from dotenv import load_dotenv load_dotenv() @@ -17,8 +17,6 @@ openai_api_key = os.getenv("OPENAI_API_KEY") porcupine_access_key = os.getenv("PORCUPINE_ACCESS_KEY") -sd.default.device = None # 'seeed-2mic-voicecard' - if not openai_api_key: raise ValueError("OpenAI API key is not set in environment variables.") if not porcupine_access_key: @@ -33,44 +31,45 @@ # Initialize the OpenAI client client = OpenAI(api_key=openai_api_key, default_headers={"OpenAI-Beta": "assistants=v2"}) -def record_audio(samplerate=44100, chunk_duration=1, silence_threshold=2000, timeout=5): +def record_audio(samplerate=44100, chunk_duration=1, silence_threshold=1000, silence_duration=10): """ - Record audio from the default microphone until silence is detected, - with a timeout period to allow capturing additional audio. + Record audio in real-time and stop when silence is detected for the specified duration. """ print("Recording... Press Ctrl+C to stop.") audio_file = [] - - start_time = time.time() # Record the start time + silence_start_time = None + chunk_size = int(chunk_duration * samplerate) + try: - while True: - recording = sd.rec(int(chunk_duration * samplerate), samplerate=samplerate, channels=1, dtype='int16') - sd.wait() - audio_file.append(recording) - - # Check if the last recorded chunk is silent - if is_silent(recording, silence_threshold): - # Check if timeout period has elapsed - if time.time() - start_time > timeout: - print("Timeout reached, stopping recording.") - break - else: - print("Silence detected, waiting for more audio...") + with sd.InputStream(samplerate=samplerate, channels=1, dtype='int16', callback=lambda indata, frames, time, status: audio_file.append(indata.copy())): + while True: + if len(audio_file) > 0: + last_chunk = audio_file[-1] + + # Check if the last chunk is silent + if is_silent(last_chunk, silence_threshold): + if silence_start_time is None: + silence_start_time = time.time() + elif time.time() - silence_start_time > silence_duration: + print("Silence detected, stopping recording.") + break + else: + silence_start_time = None except KeyboardInterrupt: print("Recording stopped manually.") + + return np.concatenate(audio_file, axis=0) if audio_file else np.array([]) - if audio_file: - audio_file = np.concatenate(audio_file, axis=0) - return audio_file - else: - raise ValueError("No audio file recorded.") -def is_silent(file, threshold=500): +def is_silent(chunk, threshold=1000): """ - Returns True if the audio file is below the silent threshold. + Returns True if the audio chunk is below the silent threshold. """ - return np.abs(file).mean() < threshold + # Calculate the RMS value of the audio chunk + rms = np.sqrt(np.mean(np.square(chunk))) + return rms < threshold + recorder = PvRecorder(frame_length=porcupine.frame_length) recorder.start() @@ -85,8 +84,8 @@ def is_silent(file, threshold=500): elif keyword_index == 1: print("Detected 'bumblebee'") - # Record audio from the microphone with extended silence detection - audio_file = record_audio(timeout=5) # Adjust timeout + # Record audio from the microphone with real-time silence detection + audio_file = record_audio(silence_duration=5) # Convert audio to text using OpenAI API with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmpfile: @@ -110,4 +109,4 @@ def is_silent(file, threshold=500): if porcupine is not None: porcupine.delete() recorder.stop() - recorder.delete() \ No newline at end of file + recorder.delete() From 4bf1250e72f42de0c32d64aaae66b70e321e7435 Mon Sep 17 00:00:00 2001 From: RiannaBarrett Date: Tue, 30 Jul 2024 11:32:52 -0400 Subject: [PATCH 12/14] removed pv recorder --- voiceassist.py | 127 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 127 insertions(+) create mode 100644 voiceassist.py diff --git a/voiceassist.py b/voiceassist.py new file mode 100644 index 0000000..96890ca --- /dev/null +++ b/voiceassist.py @@ -0,0 +1,127 @@ +import sounddevice as sd +import numpy as np +import tempfile +import wavio +from openai import OpenAI +import threading +import os +import pvporcupine +import time +from response import handle_interaction +from dotenv import load_dotenv + +load_dotenv() + +# Retrieve the OpenAI API key and Porcupine access key from environment variables +openai_api_key = os.getenv("OPENAI_API_KEY") +porcupine_access_key = os.getenv("PORCUPINE_ACCESS_KEY") + +if not openai_api_key: + raise ValueError("OpenAI API key is not set in environment variables.") +if not porcupine_access_key: + raise ValueError("Porcupine access key is not set in environment variables.") + +# Initialize Porcupine +porcupine = pvporcupine.create( + access_key=porcupine_access_key, + keywords=["picovoice", "bumblebee"] +) + + +# Initialize the OpenAI client +client = OpenAI(api_key=openai_api_key, default_headers={"OpenAI-Beta": "assistants=v2"}) + + +def record_audio(samplerate=44100, chunk_duration=1, silence_threshold=1000, silence_duration=10): + """ + Record audio in real-time and stop when silence is detected for the specified duration. + """ + print("Recording... Press Ctrl+C to stop.") + audio_file = [] + silence_start_time = None + chunk_size = int(chunk_duration * samplerate) + + try: + with sd.InputStream(samplerate=samplerate, channels=1, dtype='int16', callback=lambda indata, frames, time, status: audio_file.append(indata.copy())): + while True: + if len(audio_file) > 0: + last_chunk = audio_file[-1] + + # Check if the last chunk is silent + if is_silent(last_chunk, silence_threshold): + if silence_start_time is None: + silence_start_time = time.time() + elif time.time() - silence_start_time > silence_duration: + print("Silence detected, stopping recording.") + break + else: + silence_start_time = None + + except KeyboardInterrupt: + print("Recording stopped manually.") + + return np.concatenate(audio_file, axis=0) if audio_file else np.array([]) + + +# Define frame length and sample rate +frame_length = porcupine.frame_length +sample_rate = porcupine.sample_rate + +def is_silent(chunk, threshold=1000): + """ + Returns True if the audio chunk is below the silent threshold. + """ + # Calculate the RMS value of the audio chunk + rms = np.sqrt(np.mean(np.square(chunk))) + return rms < threshold + +def detect_wake_word(): + """ + Detect wake words using Porcupine. + """ + try: + with sd.InputStream(samplerate=sample_rate, channels=1, dtype='int16') as stream: + while True: + pcm = stream.read(frame_length)[0] + pcm = np.frombuffer(pcm, dtype=np.int16) + keyword_index = porcupine.process(pcm) + + if keyword_index == 0: + print("Detected 'picovoice'") + return 'picovoice' + elif keyword_index == 1: + print("Detected 'bumblebee'") + return 'bumblebee' + + except KeyboardInterrupt: + print("Script interrupted.") + +try: + while True: + wake_word = detect_wake_word() + + if wake_word == 'bumblebee': + # Record audio from the microphone with real-time silence detection + audio_file = record_audio(silence_duration=5) + + # Convert audio to text using OpenAI API + with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmpfile: + tmpfilename = tmpfile.name + wavio.write(tmpfilename, audio_file, 44100, sampwidth=2) + + transcription = client.audio.transcriptions.create( + model="whisper-1", + file=open(tmpfilename, "rb"), + ) + + print("Transcription:", transcription.text) + + # Start a new thread for handling the interaction + interaction_thread = threading.Thread(target=handle_interaction, args=(transcription.text,)) + interaction_thread.start() + +except KeyboardInterrupt: + print("Script interrupted.") +finally: + if porcupine is not None: + porcupine.delete() From 42811f542371e682fa4a3ef81b2adf9c351cb4e5 Mon Sep 17 00:00:00 2001 From: RiannaBarrett Date: Tue, 13 Aug 2024 13:23:16 -0400 Subject: [PATCH 13/14] conversational without repeating wake word --- voiceassist.py | 115 +++++++++++++++++++++++++------------------------ 1 file changed, 59 insertions(+), 56 deletions(-) diff --git a/voiceassist.py b/voiceassist.py index 96890ca..c78bdcb 100644 --- a/voiceassist.py +++ b/voiceassist.py @@ -31,11 +31,31 @@ # Initialize the OpenAI client client = OpenAI(api_key=openai_api_key, default_headers={"OpenAI-Beta": "assistants=v2"}) +frame_length = porcupine.frame_length +sample_rate = porcupine.sample_rate + +def is_silent(chunk, threshold=1000): + rms = np.sqrt(np.mean(np.square(chunk))) + return rms < threshold + +def detect_wake_word(): + try: + with sd.InputStream(samplerate=sample_rate, channels=1, dtype='int16') as stream: + while True: + pcm = stream.read(frame_length)[0] + pcm = np.frombuffer(pcm, dtype=np.int16) + keyword_index = porcupine.process(pcm) + + if keyword_index == 0: + print("Detected 'picovoice'") + return 'picovoice' + elif keyword_index == 1: + print("Detected 'bumblebee'") + return 'bumblebee' + except KeyboardInterrupt: + print("Script interrupted.") def record_audio(samplerate=44100, chunk_duration=1, silence_threshold=1000, silence_duration=10): - """ - Record audio in real-time and stop when silence is detected for the specified duration. - """ print("Recording... Press Ctrl+C to stop.") audio_file = [] silence_start_time = None @@ -62,63 +82,46 @@ def record_audio(samplerate=44100, chunk_duration=1, silence_threshold=1000, sil return np.concatenate(audio_file, axis=0) if audio_file else np.array([]) - -# Define frame length and sample rate -frame_length = porcupine.frame_length -sample_rate = porcupine.sample_rate - -def is_silent(chunk, threshold=1000): - """ - Returns True if the audio chunk is below the silent threshold. - """ - # Calculate the RMS value of the audio chunk - rms = np.sqrt(np.mean(np.square(chunk))) - return rms < threshold - -def detect_wake_word(): - """ - Detect wake words using Porcupine. - """ - try: - with sd.InputStream(samplerate=sample_rate, channels=1, dtype='int16') as stream: - while True: - pcm = stream.read(frame_length)[0] - pcm = np.frombuffer(pcm, dtype=np.int16) - keyword_index = porcupine.process(pcm) - - if keyword_index == 0: - print("Detected 'picovoice'") - return 'picovoice' - elif keyword_index == 1: - print("Detected 'bumblebee'") - return 'bumblebee' - - except KeyboardInterrupt: - print("Script interrupted.") - -try: +def handle_follow_up(): + print("Listening for follow-up command...") + audio_file = record_audio(silence_duration=5) + + if len(audio_file) > 0: + with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmpfile: + tmpfilename = tmpfile.name + wavio.write(tmpfilename, audio_file, 44100, sampwidth=2) + + transcription = client.audio.transcriptions.create( + model="whisper-1", + file=open(tmpfilename, "rb"), + ) + + print("Transcription:", transcription.text) + + interaction_thread = threading.Thread(target=handle_interaction, args=(transcription.text,)) + interaction_thread.start() + interaction_thread.join() + + # After handling the follow-up, check if there is more to listen for + handle_follow_up() + else: + print("No follow-up detected. Restarting wake word detection.") + wake_word_thread = threading.Thread(target=detect_wake_word_instance) + wake_word_thread.start() + +def detect_wake_word_instance(): while True: wake_word = detect_wake_word() - if wake_word == 'bumblebee': - # Record audio from the microphone with real-time silence detection - audio_file = record_audio(silence_duration=5) - - # Convert audio to text using OpenAI API - with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmpfile: - tmpfilename = tmpfile.name - wavio.write(tmpfilename, audio_file, 44100, sampwidth=2) - - transcription = client.audio.transcriptions.create( - model="whisper-1", - file=open(tmpfilename, "rb"), - ) + handle_follow_up() + else: + break - print("Transcription:", transcription.text) - - # Start a new thread for handling the interaction - interaction_thread = threading.Thread(target=handle_interaction, args=(transcription.text,)) - interaction_thread.start() +try: + while True: + wake_word_thread = threading.Thread(target=detect_wake_word_instance) + wake_word_thread.start() + wake_word_thread.join() except KeyboardInterrupt: print("Script interrupted.") From 094716aeebd17f1e807dc485a7a92223e84280ee Mon Sep 17 00:00:00 2001 From: RiannaBarrett Date: Thu, 15 Aug 2024 12:52:09 -0400 Subject: [PATCH 14/14] update with comments --- Archives/TTStest.py | 11 ++ Archives/text-to-response-test.py | 69 ++++++++++ Archives/wake-word-assistant.py | 187 +++++++++++++++++++++++++++ Archives/wake-word-audio.py | 112 +++++++++++++++++ Archives/wake-word-detect.py | 177 ++++++++++++++++++++++++++ Archives/wake-word-thread.py | 202 ++++++++++++++++++++++++++++++ response.py | 12 ++ voiceassist.py | 16 +++ 8 files changed, 786 insertions(+) create mode 100644 Archives/TTStest.py create mode 100644 Archives/text-to-response-test.py create mode 100644 Archives/wake-word-assistant.py create mode 100644 Archives/wake-word-audio.py create mode 100644 Archives/wake-word-detect.py create mode 100644 Archives/wake-word-thread.py diff --git a/Archives/TTStest.py b/Archives/TTStest.py new file mode 100644 index 0000000..fd72132 --- /dev/null +++ b/Archives/TTStest.py @@ -0,0 +1,11 @@ +from openai import OpenAI + +client = OpenAI() + +response = client.audio.speech.create( + model="tts-1-hd", + voice="echo", + input="Test Audio", +) + +response.stream_to_file("output.mp3") \ No newline at end of file diff --git a/Archives/text-to-response-test.py b/Archives/text-to-response-test.py new file mode 100644 index 0000000..06594d5 --- /dev/null +++ b/Archives/text-to-response-test.py @@ -0,0 +1,69 @@ +from openai import OpenAI +import io +import os +import time + +client = OpenAI(default_headers={"OpenAI-Beta": "assistants=v2"}) + + +def query_and_record(prompt, mp3_filename): + # Retrieve the OpenAI API key from environment variables + api_key = os.getenv("OPENAI_API_KEY") + if not api_key: + raise ValueError("OpenAI API key is not set in environment variables.") + + + assistant = client.beta.assistants.create( + name="Senior Tech Help", + instructions="You are a helpful tech teach specifically for seniors. You will help older adults (ages 50+) with quick questions about smartphones, voice assistants, computers, cameras, the internet, digital shopping, or any other technology related topic. You will always ask for specifics, like what device or phone they are using, and provide them with step by step instructions for their response.", + model="gpt-4-turbo", + ) + + + thread = client.beta.threads.create() + + message = client.beta.threads.messages.create( + thread_id=thread.id, + role="user", + content=prompt + ) + + run = client.beta.threads.runs.create( + thread_id=thread.id, + assistant_id=assistant.id, + instructions="Please address the user as Jane Doe. The user has a premium account." + ) + + while run.status == "in_progress" or run.status == "queued": + time.sleep(1) + run = client.beta.threads.runs.retrieve( + thread_id=thread.id, + run_id=run.id + ) + + if run.status == "completed": + message_list = client.beta.threads.messages.list( + thread_id=thread.id + ) + + + + # Extract the text from the response + text_response = message_list.data[0].content[0].text.value + + #print(text_response) + + response = client.audio.speech.create( + model="tts-1-hd", + voice="echo", + input=text_response, + ) + + response.stream_to_file(mp3_filename) + + return print("Response recorded to " + mp3_filename) + +# Example usage +prompt = input("Enter your tech question for Helper Bee:") +mp3_filename = "response.mp3" +query_and_record(prompt, mp3_filename) diff --git a/Archives/wake-word-assistant.py b/Archives/wake-word-assistant.py new file mode 100644 index 0000000..e1040d8 --- /dev/null +++ b/Archives/wake-word-assistant.py @@ -0,0 +1,187 @@ +from dotenv import load_dotenv +import sounddevice as sd +import struct +import numpy as np +import tempfile +import pvporcupine +import wave +import os +import time +from pvrecorder import PvRecorder +import wavio +from openai import OpenAI +import pygame + +load_dotenv() + +# Retrieve the OpenAI API key and Porcupine access key from environment variables +openai_api_key = os.getenv("OPENAI_API_KEY") +porcupine_access_key = os.getenv("PORCUPINE_ACCESS_KEY") +assistant_api_key = os.getenv("ASSISTANT_API_KEY") + +sd.default.device = None #'seeed-2mic-voicecard' + +if not openai_api_key: + raise ValueError("OpenAI API key is not set in environment variables.") +if not porcupine_access_key: + raise ValueError("Porcupine access key is not set in environment variables.") + +# Initialize OpenAI client +client = OpenAI(api_key=openai_api_key, default_headers={"OpenAI-Beta": "assistants=v2"}) + +# Initialize Porcupine +porcupine = pvporcupine.create( + access_key=porcupine_access_key, + keywords=["picovoice", "bumblebee"] +) + +def record_audio(samplerate=44100, chunk_duration=1, silence_threshold=2000): + """ + Record audio from the default microphone until silence is detected. + """ + print("Recording... Press Ctrl+C to stop.") + audio_file = [] + + try: + while True: + recording = sd.rec(int(chunk_duration * samplerate), samplerate=samplerate, channels=1, dtype='int16') + sd.wait() + audio_file.append(recording) + + # Check if the last recorded chunk is silent + if is_silent(recording, silence_threshold): + print("Silence detected, stopping recording.") + break + + except KeyboardInterrupt: + print("Recording stopped manually.") + + if audio_file: + audio_file = np.concatenate(audio_file, axis=0) + return audio_file + else: + raise ValueError("No audio file recorded.") + +def is_silent(file, threshold=500): + """ + Returns True if the audio file is below the silent threshold. + """ + return np.abs(file).mean() < threshold + +def query_and_record(prompt): + """ + Send a prompt to the OpenAI assistant and record the response as an MP3 file. + """ + # # Create an assistant instance + # assistant = client.beta.assistants.create( + # name="Senior Tech Help", + # instructions="You are a helpful tech teacher specifically for seniors. You will help older adults (ages 50+) with quick questions about smartphones, voice assistants, computers, cameras, the internet, digital shopping, or any other technology-related topic. You will always ask for specifics, like what device or phone they are using, and provide them with step-by-step instructions for their response.", + # model="gpt-4o" + # ) + assistant_id = assistant_api_key + + # Create a thread for communication + thread = client.beta.threads.create() + + # Send user's prompt to the AI + message = client.beta.threads.messages.create( + thread_id=thread.id, + role="user", + content=prompt + ) + + # Start the AI to process the user prompt + run = client.beta.threads.runs.create( + thread_id=thread.id, + assistant_id=assistant_id, + instructions="Please address the user as Jane Doe. The user has a premium account." + ) + + # Wait until AI is complete with processing + while run.status in ["in_progress", "queued"]: + time.sleep(1) + run = client.beta.threads.runs.retrieve( + thread_id=thread.id, + run_id=run.id + ) + + if run.status == "completed": + message_list = client.beta.threads.messages.list( + thread_id=thread.id + ) + # Extract the text content from the response + # text_response = "" + # for message in message_list.data: + # if message.role == "assistant" and message.content: + # text_response += message.content + "\n" + + + text_response = message_list.data[0].content[0].text.value + + # Generate an audio response from the text + response = client.audio.speech.create( + model="tts-1-hd", + voice="echo", + input=text_response, + ) + + with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as tmpfile: + mp3_filename = tmpfile.name + response.stream_to_file(mp3_filename) + + print("Response recorded to " + mp3_filename) + + # Initialize pygame mixer + pygame.mixer.init() + + # Load the mp3 file + pygame.mixer.music.load(mp3_filename) + + # Play the mp3 file + pygame.mixer.music.play() + + # Wait until the music finishes playing + while pygame.mixer.music.get_busy(): + pygame.time.Clock().tick(10) + +# Main loop for keyword detection and interaction +recorder = PvRecorder(frame_length=porcupine.frame_length) +recorder.start() + +try: + while True: + pcm = recorder.read() + keyword_index = porcupine.process(pcm) + + if keyword_index == 0: + print("Detected 'picovoice'") + elif keyword_index == 1: + print("Detected 'bumblebee'") + + # Record audio from the microphone + audio_file = record_audio() + + # Convert audio to text using OpenAI API + with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmpfile: + tmpfilename = tmpfile.name + wavio.write(tmpfilename, audio_file, 44100, sampwidth=2) + + transcription = client.audio.transcriptions.create( + model="whisper-1", + file=open(tmpfilename, "rb"), + ) + + print("Transcription:", transcription.text) + + # Example usage + prompt = transcription.text + query_and_record(prompt) + +except KeyboardInterrupt: + print("Script interrupted.") +finally: +# Ensuring proper release of resources + if porcupine is not None: + porcupine.delete() + recorder.stop() + recorder.delete() diff --git a/Archives/wake-word-audio.py b/Archives/wake-word-audio.py new file mode 100644 index 0000000..bf371c5 --- /dev/null +++ b/Archives/wake-word-audio.py @@ -0,0 +1,112 @@ +import sounddevice as sd +import numpy as np +import tempfile +import wavio +from openai import OpenAI +import threading +import os +import pvporcupine +#from pvrecorder import PvRecorder +import time +from response import handle_interaction +from dotenv import load_dotenv + +load_dotenv() + +# Retrieve the OpenAI API key and Porcupine access key from environment variables +openai_api_key = os.getenv("OPENAI_API_KEY") +porcupine_access_key = os.getenv("PORCUPINE_ACCESS_KEY") + +if not openai_api_key: + raise ValueError("OpenAI API key is not set in environment variables.") +if not porcupine_access_key: + raise ValueError("Porcupine access key is not set in environment variables.") + +# Initialize Porcupine +porcupine = pvporcupine.create( + access_key=porcupine_access_key, + keywords=["picovoice", "bumblebee"] +) + +# Initialize the OpenAI client +client = OpenAI(api_key=openai_api_key, default_headers={"OpenAI-Beta": "assistants=v2"}) + +def record_audio(samplerate=44100, chunk_duration=1, silence_threshold=1000, silence_duration=10): + """ + Record audio in real-time and stop when silence is detected for the specified duration. + """ + print("Recording... Press Ctrl+C to stop.") + audio_file = [] + silence_start_time = None + chunk_size = int(chunk_duration * samplerate) + + try: + with sd.InputStream(samplerate=samplerate, channels=1, dtype='int16', callback=lambda indata, frames, time, status: audio_file.append(indata.copy())): + while True: + if len(audio_file) > 0: + last_chunk = audio_file[-1] + + # Check if the last chunk is silent + if is_silent(last_chunk, silence_threshold): + if silence_start_time is None: + silence_start_time = time.time() + elif time.time() - silence_start_time > silence_duration: + print("Silence detected, stopping recording.") + break + else: + silence_start_time = None + + except KeyboardInterrupt: + print("Recording stopped manually.") + + return np.concatenate(audio_file, axis=0) if audio_file else np.array([]) + + +def is_silent(chunk, threshold=1000): + """ + Returns True if the audio chunk is below the silent threshold. + """ + # Calculate the RMS value of the audio chunk + rms = np.sqrt(np.mean(np.square(chunk))) + return rms < threshold + + +recorder = sounddevice.InputStream(frame_length=porcupine.frame_length) +recorder.start() + +try: + while True: + pcm = recorder.read() + keyword_index = porcupine.process(pcm) + + if keyword_index == 0: + print("Detected 'picovoice'") + elif keyword_index == 1: + print("Detected 'bumblebee'") + + # Record audio from the microphone with real-time silence detection + audio_file = record_audio(silence_duration=2) + + # Convert audio to text using OpenAI API + with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmpfile: + tmpfilename = tmpfile.name + wavio.write(tmpfilename, audio_file, 44100, sampwidth=2) + + transcription = client.audio.transcriptions.create( + model="whisper-1", + file=open(tmpfilename, "rb"), + ) + + print("Transcription:", transcription.text) + + # Start a new thread for handling the interaction + interaction_thread = threading.Thread(target=handle_interaction, args=(transcription.text,)) + interaction_thread.start() + +except KeyboardInterrupt: + print("Script interrupted.") +finally: + if porcupine is not None: + porcupine.delete() + recorder.stop() + recorder.delete() diff --git a/Archives/wake-word-detect.py b/Archives/wake-word-detect.py new file mode 100644 index 0000000..610d921 --- /dev/null +++ b/Archives/wake-word-detect.py @@ -0,0 +1,177 @@ +from dotenv import load_dotenv +import sounddevice as sd +import struct +import numpy as np +import tempfile +import pvporcupine +import wave +import os +import time +from pvrecorder import PvRecorder +import wavio +from openai import OpenAI + +load_dotenv() + +# Retrieve the OpenAI API key and Porcupine access key from environment variables +openai_api_key = os.getenv("OPENAI_API_KEY") +porcupine_access_key = os.getenv("PORCUPINE_ACCESS_KEY") + +sd.default.device = None #'seeed-2mic-voicecard' + +if not openai_api_key: + raise ValueError("OpenAI API key is not set in environment variables.") +if not porcupine_access_key: + raise ValueError("Porcupine access key is not set in environment variables.") + +# Initialize OpenAI client +client = OpenAI(api_key=openai_api_key, default_headers={"OpenAI-Beta": "assistants=v2"}) + +# Initialize Porcupine +porcupine = pvporcupine.create( + access_key=porcupine_access_key, + keywords=["picovoice", "bumblebee"] +) +# paud = pyaudio.PyAudio() +# audio_frame = paud.open(rate=porcupine.sample_rate, channels=1, format=pyaudio.paInt16, input=True, frames_per_buffer=porcupine.frame_length) + +def record_audio(samplerate=44100, chunk_duration=1, silence_threshold=500): + """ + Record audio from the default microphone until silence is detected. + """ + print("Recording... Press Ctrl+C to stop.") + audio_file = [] + + try: + while True: + recording = sd.rec(int(chunk_duration * samplerate), samplerate=samplerate, channels=1, dtype='int16') + sd.wait() + audio_file.append(recording) + + # Check if the last recorded chunk is silent + if is_silent(recording, silence_threshold): + print("Silence detected, stopping recording.") + break + + except KeyboardInterrupt: + print("Recording stopped manually.") + + if audio_file: + audio_file = np.concatenate(audio_file, axis=0) + return audio_file + else: + raise ValueError("No audio file recorded.") + +def is_silent(file, threshold=500): + + """ + Returns True if the audio file is below the silent threshold. + """ + return np.abs(file).mean() < threshold + +def query_and_record(prompt, mp3_filename): + """ + Send a prompt to the OpenAI assistant and record the response as an MP3 file. + """ + # Create an assistant instance + assistant = client.beta.assistants.create( + name="Senior Tech Help", + instructions="You are a helpful tech teacher specifically for seniors. You will help older adults (ages 50+) with quick questions about smartphones, voice assistants, computers, cameras, the internet, digital shopping, or any other technology-related topic. You will always ask for specifics, like what device or phone they are using, and provide them with step-by-step instructions for their response.", + model="gpt-4o" + ) + + # Create a thread for communication + thread = client.beta.threads.create() + + # Send user's prompt to the AI + message = client.beta.threads.messages.create( + thread_id=thread.id, + role="user", + content=prompt + ) + + # Start the AI to process the user prompt + run = client.beta.threads.runs.create( + thread_id=thread.id, + assistant_id=assistant.id, + instructions="Please address the user as Jane Doe. The user has a premium account." + ) + + # Wait until AI is complete with processing + while run.status in ["in_progress", "queued"]: + time.sleep(1) + run = client.beta.threads.runs.retrieve( + thread_id=thread.id, + run_id=run.id + ) + + if run.status == "completed": + message_list = client.beta.threads.messages.list( + thread_id=thread.id + ) + + # Extract the text content from the response + # text_response = "" + # for message in message_list.data: + # if message.role == "assistant" and message.content: + # text_response += message.content + "\n" + + text_response = message_list.data[0].content[0].text.value + + # Generate an audio response from the text + response = client.audio.speech.create( + model="tts-1-hd", + voice="echo", + input=text_response, + ) + + response.stream_to_file(mp3_filename) + + print("Response recorded to " + mp3_filename) + + +# Main loop for keyword detection and interaction +recorder = PvRecorder(frame_length=porcupine.frame_length) +recorder.start() +wav_file = None + +try: + while True: + pcm = recorder.read() + keyword_index = porcupine.process(pcm) + + if keyword_index == 0: + print("Detected 'picovoice'") + elif keyword_index == 1: + print("Detected 'bumblebee'") + + # Record audio from the microphone + audio_file = record_audio() + + # Convert audio to text using OpenAI API + with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmpfile: + tmpfilename = tmpfile.name + wavio.write(tmpfilename, audio_file, 44100, sampwidth=2) + + transcription = client.audio.transcriptions.create( + model="whisper-1", + file=open(tmpfilename, "rb"), + ) + + print("Transcription:", transcription.text) + + # Example usage + prompt = transcription.text + mp3_filename = "response.mp3" + query_and_record(prompt, mp3_filename) + +except KeyboardInterrupt: + print("Script interrupted.") +finally: +# Ensuring proper release of resources + if porcupine is not None: + porcupine.delete() + # if audio_frame is not None: + # audio_frame.close() + # if paud is not None: + # paud.terminate() diff --git a/Archives/wake-word-thread.py b/Archives/wake-word-thread.py new file mode 100644 index 0000000..7e91f51 --- /dev/null +++ b/Archives/wake-word-thread.py @@ -0,0 +1,202 @@ +from dotenv import load_dotenv +import sounddevice as sd +import struct +import numpy as np +import tempfile +import pvporcupine +import wave +import os +import time +from pvrecorder import PvRecorder +import wavio +from openai import OpenAI +import pygame +import threading + +load_dotenv() + +# Retrieve the OpenAI API key and Porcupine access key from environment variables +openai_api_key = os.getenv("OPENAI_API_KEY") +porcupine_access_key = os.getenv("PORCUPINE_ACCESS_KEY") +assistant_api_key = os.getenv("ASSISTANT_API_KEY") + +sd.default.device = None # 'seeed-2mic-voicecard' + +if not openai_api_key: + raise ValueError("OpenAI API key is not set in environment variables.") +if not porcupine_access_key: + raise ValueError("Porcupine access key is not set in environment variables.") + +# Initialize OpenAI client +client = OpenAI(api_key=openai_api_key, default_headers={"OpenAI-Beta": "assistants=v2"}) + +# Initialize Porcupine +porcupine = pvporcupine.create( + access_key=porcupine_access_key, + keywords=["picovoice", "bumblebee"] +) + +thread_id = None #store the thread ID + +def record_audio(samplerate=44100, chunk_duration=1, silence_threshold=2000): + """ + Record audio from the default microphone until silence is detected. + """ + print("Recording... Press Ctrl+C to stop.") + audio_file = [] + + try: + while True: + recording = sd.rec(int(chunk_duration * samplerate), samplerate=samplerate, channels=1, dtype='int16') + sd.wait() + audio_file.append(recording) + + # Check if the last recorded chunk is silent + if is_silent(recording, silence_threshold): + print("Silence detected, stopping recording.") + break + + except KeyboardInterrupt: + print("Recording stopped manually.") + + if audio_file: + audio_file = np.concatenate(audio_file, axis=0) + return audio_file + else: + raise ValueError("No audio file recorded.") + +def is_silent(file, threshold=500): + """ + Returns True if the audio file is below the silent threshold. + """ + return np.abs(file).mean() < threshold + +def query_and_record(prompt): + """ + Send a prompt to the OpenAI assistant and record the response as an MP3 file. + """ + # # Create an assistant instance + # assistant = client.beta.assistants.create( + # name="Senior Tech Help", + # instructions="You are a helpful tech teacher specifically for seniors. You will help older adults (ages 50+) with quick questions about smartphones, voice assistants, computers, cameras, the internet, digital shopping, or any other technology-related topic. You will always ask for specifics, like what device or phone they are using, and provide them with step-by-step instructions for their response.", + # model="gpt-4o" + # ) + assistant_id = assistant_api_key + + global thread_id # Access the global thread ID + + if thread_id is None: + # Create a thread for communication + thread = client.beta.threads.create() + thread_id = thread.id + print(f"New thread created with ID: {thread_id}") + else: + # Retrieve the existing thread + thread = client.beta.threads.retrieve(thread_id) + print(f"Using existing thread with ID: {thread_id}") + + # Send user's prompt to the AI + message = client.beta.threads.messages.create( + thread_id=thread.id, + role="user", + content=prompt + ) + + # Start the AI to process the user prompt + run = client.beta.threads.runs.create( + thread_id=thread.id, + assistant_id=assistant_id, + instructions="Please address the user as Jane Doe. The user has a premium account." + ) + + # Wait until AI is complete with processing + while run.status in ["in_progress", "queued"]: + time.sleep(1) + run = client.beta.threads.runs.retrieve( + thread_id=thread.id, + run_id=run.id + ) + + if run.status == "completed": + message_list = client.beta.threads.messages.list( + thread_id=thread.id + ) + + text_response = message_list.data[0].content[0].text.value + + # Generate an audio response from the text + response = client.audio.speech.create( + model="tts-1-hd", + voice="echo", + input=text_response, + ) + + with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as tmpfile: + mp3_filename = tmpfile.name + response.stream_to_file(mp3_filename) + + print("Response recorded to " + mp3_filename) + + # Initialize pygame mixer + pygame.mixer.init() + + # Load the mp3 file + pygame.mixer.music.load(mp3_filename) + + # Play the mp3 file + pygame.mixer.music.play() + + # Wait until the response finishes playing + while pygame.mixer.music.get_busy(): + pygame.time.Clock().tick(10) + +def handle_interaction(prompt): + """ + Handle the interaction with the AI in a separate thread. + """ + query_and_record(prompt) + +# Main loop for keyword detection and interaction +recorder = PvRecorder(frame_length=porcupine.frame_length) +recorder.start() + +try: + while True: + pcm = recorder.read() + keyword_index = porcupine.process(pcm) + + if keyword_index == 0: + print("Detected 'picovoice'") + elif keyword_index == 1: + print("Detected 'bumblebee'") + + # Record audio from the microphone + audio_file = record_audio() + + # Convert audio to text using OpenAI API + with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmpfile: + tmpfilename = tmpfile.name + wavio.write(tmpfilename, audio_file, 44100, sampwidth=2) + + transcription = client.audio.transcriptions.create( + model="whisper-1", + file=open(tmpfilename, "rb"), + ) + + print("Transcription:", transcription.text) + + # Example usage + prompt = transcription.text + + # Start a new thread for handling the interaction + interaction_thread = threading.Thread(target=handle_interaction, args=(prompt,)) + interaction_thread.start() + +except KeyboardInterrupt: + print("Script interrupted.") +finally: +# Ensuring proper release of resources + if porcupine is not None: + porcupine.delete() + recorder.stop() + recorder.delete() diff --git a/response.py b/response.py index 0062ab3..d532f97 100644 --- a/response.py +++ b/response.py @@ -1,3 +1,15 @@ +""" +This script interacts with an OpenAI assistant by sending text prompts and receiving voice responses. +It performs the following tasks: +1. Loads environment variables for the OpenAI API key and assistant access key. +2. Initializes the audio device using the sounddevice library. +3. Sets up an OpenAI client with the provided API key. +4. Manages conversation threads with the assistant, either creating a new one or continuing an existing one. +5. Sends user prompts to the assistant and retrieves responses. +6. Converts the assistant's text response into an audio file. +7. Plays the audio response using the pygame library. +""" + import tempfile import os from openai import OpenAI diff --git a/voiceassist.py b/voiceassist.py index c78bdcb..f7bdbc1 100644 --- a/voiceassist.py +++ b/voiceassist.py @@ -1,3 +1,19 @@ +""" +This script is designed for detecting a wake word and handling voice interactions with an OpenAI assistant. +The script performs the following tasks: + +1. Imports necessary libraries such as sounddevice, numpy, tempfile, wavio, and OpenAI. +2. Loads environment variables to retrieve the OpenAI API key and Porcupine access key, which are essential for authentication and wake word detection. +3. Initializes the Porcupine wake word engine to listen for specific keywords ("picovoice" and "bumblebee"). +4. Sets up an OpenAI client for sending user prompts and receiving responses. +5. Defines functions to: + - Detect the wake word using an audio stream. + - Record audio until silence is detected, indicating the end of the user's command. + - Handle follow-up commands by continuously listening after the initial response. +6. The main loop continuously listens for the wake word and handles interactions as long as the script is running. +7. Ensures proper cleanup by deleting the Porcupine instance when the script is interrupted. +""" + import sounddevice as sd import numpy as np import tempfile