Skip to content

Commit

Permalink
Merge pull request #280 from pipecat-ai/aleix/library-updates-070224
Browse files Browse the repository at this point in the history
library updates 070224 and pipecat 0.0.36
  • Loading branch information
aconchillo authored Jul 2, 2024
2 parents 8f6db5e + 3147534 commit 065cfb2
Show file tree
Hide file tree
Showing 7 changed files with 54 additions and 35 deletions.
4 changes: 3 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ All notable changes to **pipecat** will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [Unreleased]
## [0.0.36] - 2024-07-02

### Added

Expand Down Expand Up @@ -61,6 +61,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

### Other

- Added Fly.io deployment example in `examples/deployment/flyio-example`.

- Added new `17-detect-user-idle.py` example that shows how to use the new
`UserIdleProcessor`.

Expand Down
5 changes: 3 additions & 2 deletions examples/foundational/06a-image-sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,11 +67,12 @@ async def main(room_url: str, token):
"Respond bot",
DailyParams(
audio_out_enabled=True,
camera_out_enabled=True,
camera_out_width=1024,
camera_out_height=1024,
transcription_enabled=True,
vad_enabled=True,
vad_analyzer=SileroVADAnalyzer()
vad_analyzer=SileroVADAnalyzer(),
)
)

Expand Down Expand Up @@ -116,7 +117,7 @@ async def main(room_url: str, token):
async def on_first_participant_joined(transport, participant):
participant_name = participant["info"]["userName"] or ''
transport.capture_participant_transcription(participant["id"])
await task.queue_frames([TextFrame(f"Hi, this is {participant_name}.")])
await task.queue_frames([TextFrame(f"Hi there {participant_name}!")])

runner = PipelineRunner()

Expand Down
16 changes: 9 additions & 7 deletions linux-py3.10-requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ aiosignal==1.3.1
# via aiohttp
annotated-types==0.7.0
# via pydantic
anthropic==0.25.9
anthropic==0.28.1
# via
# openpipe
# pipecat-ai (pyproject.toml)
Expand All @@ -38,7 +38,7 @@ attrs==23.2.0
# openpipe
av==12.2.0
# via faster-whisper
azure-cognitiveservices-speech==1.37.0
azure-cognitiveservices-speech==1.38.0
# via pipecat-ai (pyproject.toml)
blinker==1.8.2
# via flask
Expand Down Expand Up @@ -117,7 +117,7 @@ fsspec==2024.6.1
# torch
future==1.0.0
# via pyloudnorm
google-ai-generativelanguage==0.6.4
google-ai-generativelanguage==0.6.6
# via google-generativeai
google-api-core[grpc]==2.19.1
# via
Expand All @@ -135,7 +135,7 @@ google-auth==2.31.0
# google-generativeai
google-auth-httplib2==0.2.0
# via google-api-python-client
google-generativeai==0.5.4
google-generativeai==0.7.1
# via pipecat-ai (pyproject.toml)
googleapis-common-protos==1.63.2
# via
Expand Down Expand Up @@ -197,6 +197,8 @@ jinja2==3.1.4
# fastapi
# flask
# torch
jiter==0.5.0
# via anthropic
jsonpatch==1.33
# via langchain-core
jsonpointer==3.0.0
Expand All @@ -217,7 +219,7 @@ langchain-openai==0.1.10
# via pipecat-ai (pyproject.toml)
langchain-text-splitters==0.2.2
# via langchain
langsmith==0.1.82
langsmith==0.1.83
# via
# langchain
# langchain-community
Expand Down Expand Up @@ -294,12 +296,12 @@ nvidia-nvtx-cu12==12.1.105
# via torch
onnxruntime==1.18.1
# via faster-whisper
openai==1.26.0
openai==1.27.0
# via
# langchain-openai
# openpipe
# pipecat-ai (pyproject.toml)
openpipe==4.14.0
openpipe==4.16.0
# via pipecat-ai (pyproject.toml)
orjson==3.10.5
# via
Expand Down
16 changes: 9 additions & 7 deletions macos-py3.10-requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ aiosignal==1.3.1
# via aiohttp
annotated-types==0.7.0
# via pydantic
anthropic==0.25.9
anthropic==0.28.1
# via
# openpipe
# pipecat-ai (pyproject.toml)
Expand All @@ -38,7 +38,7 @@ attrs==23.2.0
# openpipe
av==12.2.0
# via faster-whisper
azure-cognitiveservices-speech==1.37.0
azure-cognitiveservices-speech==1.38.0
# via pipecat-ai (pyproject.toml)
blinker==1.8.2
# via flask
Expand Down Expand Up @@ -116,7 +116,7 @@ fsspec==2024.6.1
# torch
future==1.0.0
# via pyloudnorm
google-ai-generativelanguage==0.6.4
google-ai-generativelanguage==0.6.6
# via google-generativeai
google-api-core[grpc]==2.19.1
# via
Expand All @@ -134,7 +134,7 @@ google-auth==2.31.0
# google-generativeai
google-auth-httplib2==0.2.0
# via google-api-python-client
google-generativeai==0.5.4
google-generativeai==0.7.1
# via pipecat-ai (pyproject.toml)
googleapis-common-protos==1.63.2
# via
Expand Down Expand Up @@ -194,6 +194,8 @@ jinja2==3.1.4
# fastapi
# flask
# torch
jiter==0.5.0
# via anthropic
jsonpatch==1.33
# via langchain-core
jsonpointer==3.0.0
Expand All @@ -214,7 +216,7 @@ langchain-openai==0.1.10
# via pipecat-ai (pyproject.toml)
langchain-text-splitters==0.2.2
# via langchain
langsmith==0.1.82
langsmith==0.1.83
# via
# langchain
# langchain-community
Expand Down Expand Up @@ -260,12 +262,12 @@ numpy==1.26.4
# transformers
onnxruntime==1.18.1
# via faster-whisper
openai==1.26.0
openai==1.27.0
# via
# langchain-openai
# openpipe
# pipecat-ai (pyproject.toml)
openpipe==4.14.0
openpipe==4.16.0
# via pipecat-ai (pyproject.toml)
orjson==3.10.5
# via
Expand Down
22 changes: 11 additions & 11 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -34,25 +34,25 @@ Source = "https://github.com/pipecat-ai/pipecat"
Website = "https://pipecat.ai"

[project.optional-dependencies]
anthropic = [ "anthropic~=0.25.7" ]
azure = [ "azure-cognitiveservices-speech~=1.37.0" ]
cartesia = [ "cartesia~=1.0.0" ]
anthropic = [ "anthropic~=0.28.1" ]
azure = [ "azure-cognitiveservices-speech~=1.38.0" ]
cartesia = [ "cartesia~=1.0.3" ]
daily = [ "daily-python~=0.10.1" ]
deepgram = [ "deepgram-sdk~=3.2.7" ]
examples = [ "python-dotenv~=1.0.0", "flask~=3.0.3", "flask_cors~=4.0.1" ]
fal = [ "fal-client~=0.4.0" ]
fal = [ "fal-client~=0.4.1" ]
gladia = [ "websockets~=12.0" ]
google = [ "google-generativeai~=0.5.3" ]
fireworks = [ "openai~=1.26.0" ]
langchain = [ "langchain~=0.2.1", "langchain-community~=0.2.1", "langchain-openai~=0.1.8" ]
google = [ "google-generativeai~=0.7.1" ]
fireworks = [ "openai~=1.27.0" ]
langchain = [ "langchain~=0.2.6", "langchain-community~=0.2.6", "langchain-openai~=0.1.10" ]
local = [ "pyaudio~=0.2.0" ]
moondream = [ "einops~=0.8.0", "timm~=0.9.16", "transformers~=4.40.2" ]
openai = [ "openai~=1.26.0" ]
openpipe = [ "openpipe~=4.14.0" ]
openai = [ "openai~=1.27.0" ]
openpipe = [ "openpipe~=4.16.0" ]
playht = [ "pyht~=0.0.28" ]
silero = [ "torch~=2.3.0", "torchaudio~=2.3.0" ]
silero = [ "torch~=2.3.1", "torchaudio~=2.3.1" ]
websocket = [ "websockets~=12.0", "fastapi~=0.111.0" ]
whisper = [ "faster-whisper~=1.0.2" ]
whisper = [ "faster-whisper~=1.0.3" ]
xtts = [ "resampy~=0.4.3" ]

[tool.setuptools.packages.find]
Expand Down
7 changes: 4 additions & 3 deletions src/pipecat/services/azure.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,11 @@
ErrorFrame,
Frame,
StartFrame,
StartInterruptionFrame,
SystemFrame,
TranscriptionFrame,
URLImageRawFrame)
from pipecat.processors.frame_processor import FrameDirection
from pipecat.services.ai_services import AIService, AsyncAIService, TTSService, ImageGenService
from pipecat.services.ai_services import AsyncAIService, TTSService, ImageGenService
from pipecat.services.openai import BaseOpenAILLMService

from loguru import logger
Expand Down Expand Up @@ -83,7 +82,7 @@ def can_generate_metrics(self) -> bool:
return True

async def run_tts(self, text: str) -> AsyncGenerator[Frame, None]:
logger.debug(f"Generating TTS: {text}")
logger.debug(f"Generating TTS: [{text}]")

await self.start_ttfb_metrics()

Expand Down Expand Up @@ -148,9 +147,11 @@ async def start(self, frame: StartFrame):

async def stop(self, frame: EndFrame):
self._speech_recognizer.stop_continuous_recognition_async()
self._audio_stream.close()

async def cancel(self, frame: CancelFrame):
self._speech_recognizer.stop_continuous_recognition_async()
self._audio_stream.close()

def _on_handle_recognized(self, event):
if event.result.reason == ResultReason.RecognizedSpeech and len(event.result.text) > 0:
Expand Down
19 changes: 15 additions & 4 deletions src/pipecat/services/cartesia.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

from typing import AsyncGenerator

from pipecat.frames.frames import AudioRawFrame, Frame
from pipecat.frames.frames import AudioRawFrame, CancelFrame, EndFrame, Frame, StartFrame
from pipecat.services.ai_services import TTSService

from loguru import logger
Expand All @@ -28,21 +28,32 @@ def __init__(
super().__init__(**kwargs)

self._api_key = api_key
self._voice_id = voice_id
self._model_id = model_id
self._output_format = {
"container": "raw",
"encoding": encoding,
"sample_rate": sample_rate,
}
self._client = None

def can_generate_metrics(self) -> bool:
return True

async def start(self, frame: StartFrame):
try:
self._client = AsyncCartesia(api_key=self._api_key)
self._voice = self._client.voices.get(id=voice_id)
self._voice = self._client.voices.get(id=self._voice_id)
except Exception as e:
logger.exception(f"{self} initialization error: {e}")

def can_generate_metrics(self) -> bool:
return True
async def stop(self, frame: EndFrame):
if self._client:
await self._client.close()

async def cancel(self, frame: CancelFrame):
if self._client:
await self._client.close()

async def run_tts(self, text: str) -> AsyncGenerator[Frame, None]:
logger.debug(f"Generating TTS: [{text}]")
Expand Down

0 comments on commit 065cfb2

Please sign in to comment.