-
Notifications
You must be signed in to change notification settings - Fork 3.5k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
removed built-in audio players, split for websocket and rtc
- Loading branch information
1 parent
6d117be
commit 9268d10
Showing
15 changed files
with
689 additions
and
743 deletions.
There are no files selected for viewing
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
95 changes: 95 additions & 0 deletions
95
python/samples/concepts/realtime/01-chat_with_realtime_websocket.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,95 @@ | ||
# Copyright (c) Microsoft. All rights reserved. | ||
|
||
import asyncio | ||
import logging | ||
|
||
from samples.concepts.realtime.utils import AudioPlayerWebsocket, AudioRecorderWebsocket, check_audio_devices | ||
from semantic_kernel.connectors.ai.open_ai import ( | ||
ListenEvents, | ||
OpenAIRealtime, | ||
OpenAIRealtimeExecutionSettings, | ||
TurnDetection, | ||
) | ||
|
||
logging.basicConfig(level=logging.WARNING) | ||
utils_log = logging.getLogger("samples.concepts.realtime.utils") | ||
utils_log.setLevel(logging.INFO) | ||
aiortc_log = logging.getLogger("aiortc") | ||
aiortc_log.setLevel(logging.WARNING) | ||
aioice_log = logging.getLogger("aioice") | ||
aioice_log.setLevel(logging.WARNING) | ||
logger = logging.getLogger(__name__) | ||
logger.setLevel(logging.INFO) | ||
|
||
# This simple sample demonstrates how to use the OpenAI Realtime API to create | ||
# a chat bot that can listen and respond directly through audio. | ||
# It requires installing: | ||
# - semantic-kernel[openai_realtime] | ||
# - pyaudio | ||
# - sounddevice | ||
# - pydub | ||
# - aiortc | ||
# e.g. pip install pyaudio sounddevice pydub | ||
|
||
# The characterics of your speaker and microphone are a big factor in a smooth conversation | ||
# so you may need to try out different devices for each. | ||
# you can also play around with the turn_detection settings to get the best results. | ||
# It has device id's set in the AudioRecorderStream and AudioPlayerAsync classes, | ||
# so you may need to adjust these for your system. | ||
# you can check the available devices by uncommenting line below the function | ||
check_audio_devices() | ||
|
||
|
||
async def main() -> None: | ||
# create the realtime client and optionally add the audio output function, this is optional | ||
# you can define the protocol to use, either "websocket" or "webrtc" | ||
# they will behave the same way, even though the underlying protocol is quite different | ||
audio_player = AudioPlayerWebsocket() | ||
realtime_client = OpenAIRealtime( | ||
"websocket", | ||
audio_output_callback=audio_player.client_callback, | ||
) | ||
audio_recorder = AudioRecorderWebsocket(realtime_client=realtime_client) | ||
# Create the settings for the session | ||
settings = OpenAIRealtimeExecutionSettings( | ||
instructions=""" | ||
You are a chat bot. Your name is Mosscap and | ||
you have one goal: figure out what people need. | ||
Your full name, should you need to know it, is | ||
Splendid Speckled Mosscap. You communicate | ||
effectively, but you tend to answer with long | ||
flowery prose. | ||
""", | ||
voice="shimmer", | ||
turn_detection=TurnDetection(type="server_vad", create_response=True, silence_duration_ms=800, threshold=0.8), | ||
) | ||
# the context manager calls the create_session method on the client and start listening to the audio stream | ||
print("Mosscap (transcript): ", end="") | ||
|
||
async with realtime_client, audio_player, audio_recorder: | ||
await realtime_client.update_session(settings=settings, create_response=True) | ||
|
||
async for event in realtime_client.receive(): | ||
match event.event_type: | ||
# this can be used as an alternative to the callback function used above, | ||
# the callback is faster and smoother | ||
# case "audio": | ||
# await audio_player.add_audio(event.audio) | ||
case "text": | ||
print(event.text.text, end="") | ||
case "service": | ||
# OpenAI Specific events | ||
if event.service_type == ListenEvents.SESSION_UPDATED: | ||
print("Session updated") | ||
if event.service_type == ListenEvents.RESPONSE_CREATED: | ||
print("") | ||
if event.service_type == ListenEvents.ERROR: | ||
logger.error(event.event) | ||
|
||
|
||
if __name__ == "__main__": | ||
print( | ||
"Instruction: start speaking, when you stop the API should detect you finished and start responding. " | ||
"Press ctrl + c to stop the program." | ||
) | ||
asyncio.run(main()) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.