From 2018772c2dc2451dde11a10c29d163e8a0653329 Mon Sep 17 00:00:00 2001 From: Michael Louis Date: Tue, 22 Oct 2024 13:23:13 -0400 Subject: [PATCH 1/5] Added serverless API --- README.md | 4 ++++ examples/serverless-api/README.md | 33 ++++++++++++++++++++++++++ examples/serverless-api/cerebrium.toml | 19 +++++++++++++++ examples/serverless-api/main.py | 21 ++++++++++++++++ 4 files changed, 77 insertions(+) create mode 100644 examples/serverless-api/README.md create mode 100644 examples/serverless-api/cerebrium.toml create mode 100644 examples/serverless-api/main.py diff --git a/README.md b/README.md index 80c6bb1..ec7f82a 100644 --- a/README.md +++ b/README.md @@ -73,6 +73,10 @@ It is possible to make minor changes to the generated speech. In addition, it is required to use `--rate=-50%` instead of `--rate -50%` (note the lack of an equal sign) otherwise the `-50%` would be interpreted as just another argument. +### Deploying to Serverless API + +See the [examples/serverless-api](examples/serverless-api) folder for more information on how to deploy to [Cerebrium](https://www.cerebrium.ai). + ### Note on the `edge-playback` command `edge-playback` is just a wrapper around `edge-tts` that plays back the generated speech. It takes the same arguments as the `edge-tts` option. diff --git a/examples/serverless-api/README.md b/examples/serverless-api/README.md new file mode 100644 index 0000000..3c0fba1 --- /dev/null +++ b/examples/serverless-api/README.md @@ -0,0 +1,33 @@ +# Serverless Edge TTS API + +This project demonstrates how to run Edge TTS as a serverless API using [Cerebrium](https://www.cerebrium.ai) + +## Overview + +The `main.py` file contains a function `run` that takes a text input and an optional voice parameter to generate audio and subtitles using Edge TTS. + +## Installation + +1. pip install cerebrium +2. cerebrium login +3. Make sure you are in the serverless-api folder and run ```cerebrium deploy``` + +## Usage + +Once deployed, you should be able to make a curl request similar to: +``` +curl --location 'https://api.cortex.cerebrium.ai/v4/p-xxxxxx/serverless-api/run' \ +--header 'Authorization: Bearer ' \ +--header 'Content-Type: application/json' \ +--data '{"text": "Tell me something"}' +``` + +The `run` function takes two parameters: + +- `text` (str): The text to be converted to speech +- `voice` (str, optional): The voice to use for TTS (default: "en-GB-SoniaNeural") + +It returns a dictionary containing: + +- `audio_data`: The generated audio as a base64-encoded string +- `subtitles`: The generated subtitles in WebVTT format diff --git a/examples/serverless-api/cerebrium.toml b/examples/serverless-api/cerebrium.toml new file mode 100644 index 0000000..6983bdb --- /dev/null +++ b/examples/serverless-api/cerebrium.toml @@ -0,0 +1,19 @@ +[cerebrium.deployment] +name = "serverless-api" +python_version = "3.11" +docker_base_image_url = "debian:bookworm-slim" +include = "[./*, main.py, cerebrium.toml]" +exclude = "[.*]" + +[cerebrium.hardware] +cpu = 2 +memory = 12.0 +compute = "CPU" + +[cerebrium.scaling] +min_replicas = 0 +max_replicas = 5 +cooldown = 30 + +[cerebrium.dependencies.pip] +"edge-tts" = "latest" \ No newline at end of file diff --git a/examples/serverless-api/main.py b/examples/serverless-api/main.py new file mode 100644 index 0000000..87aa140 --- /dev/null +++ b/examples/serverless-api/main.py @@ -0,0 +1,21 @@ + +import edge_tts + +async def run(text: str, voice: str = "en-GB-SoniaNeural"): + + communicate = edge_tts.Communicate(text, voice) + submaker = edge_tts.SubMaker() + audio_data = bytearray() + subtitles = "" + + async for chunk in communicate.stream(): + if chunk["type"] == "audio": + audio_data.extend(chunk["data"]) + elif chunk["type"] == "WordBoundary": + submaker.create_sub((chunk["offset"], chunk["duration"]), chunk["text"]) + + subtitles = submaker.generate_subs() + return { + "audio_data": audio_data.decode("latin-1"), + "subtitles": subtitles + } \ No newline at end of file From 9d8896b14b355cfd75288c9a3e2705a87cd0b778 Mon Sep 17 00:00:00 2001 From: Michael Louis Date: Tue, 22 Oct 2024 13:25:00 -0400 Subject: [PATCH 2/5] Added serverless API --- examples/serverless-api/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/serverless-api/README.md b/examples/serverless-api/README.md index 3c0fba1..ecad320 100644 --- a/examples/serverless-api/README.md +++ b/examples/serverless-api/README.md @@ -4,7 +4,7 @@ This project demonstrates how to run Edge TTS as a serverless API using [Cerebri ## Overview -The `main.py` file contains a function `run` that takes a text input and an optional voice parameter to generate audio and subtitles using Edge TTS. +The `main.py` file contains a function `run` that takes a text input and an optional voice parameter to generate audio and subtitles using Edge TTS. This example specifically streams the output. ## Installation @@ -14,7 +14,7 @@ The `main.py` file contains a function `run` that takes a text input and an opti ## Usage -Once deployed, you should be able to make a curl request similar to: +Once deployed, you should be able to make a curl request similar to the below. You can find this url on your Cerebrium dashboard. ``` curl --location 'https://api.cortex.cerebrium.ai/v4/p-xxxxxx/serverless-api/run' \ --header 'Authorization: Bearer ' \ From dfa037b17f521d097e03477dca2e24ce4bec3f42 Mon Sep 17 00:00:00 2001 From: Michael Louis Date: Tue, 22 Oct 2024 13:40:40 -0400 Subject: [PATCH 3/5] Updated toml --- examples/serverless-api/cerebrium.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/serverless-api/cerebrium.toml b/examples/serverless-api/cerebrium.toml index 6983bdb..b978aef 100644 --- a/examples/serverless-api/cerebrium.toml +++ b/examples/serverless-api/cerebrium.toml @@ -7,7 +7,7 @@ exclude = "[.*]" [cerebrium.hardware] cpu = 2 -memory = 12.0 +memory = 6.0 compute = "CPU" [cerebrium.scaling] From 1300546f7bbff274a1f1d44b420e8865266eac49 Mon Sep 17 00:00:00 2001 From: Michael Louis Date: Tue, 22 Oct 2024 13:54:15 -0400 Subject: [PATCH 4/5] Fixed linting --- examples/serverless-api/main.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/examples/serverless-api/main.py b/examples/serverless-api/main.py index 87aa140..2c54bf7 100644 --- a/examples/serverless-api/main.py +++ b/examples/serverless-api/main.py @@ -1,7 +1,9 @@ import edge_tts +from typing import Dict +from typing import AsyncGenerator -async def run(text: str, voice: str = "en-GB-SoniaNeural"): +async def run(text: str, voice: str = "en-GB-SoniaNeural") -> AsyncGenerator[Dict[str, str], None]: communicate = edge_tts.Communicate(text, voice) submaker = edge_tts.SubMaker() @@ -15,7 +17,7 @@ async def run(text: str, voice: str = "en-GB-SoniaNeural"): submaker.create_sub((chunk["offset"], chunk["duration"]), chunk["text"]) subtitles = submaker.generate_subs() - return { + yield { "audio_data": audio_data.decode("latin-1"), "subtitles": subtitles } \ No newline at end of file From ed7919a7631ac61ce100640fd7c25e785160bbf5 Mon Sep 17 00:00:00 2001 From: Michael Louis Date: Tue, 22 Oct 2024 14:00:08 -0400 Subject: [PATCH 5/5] Added doc string --- examples/serverless-api/main.py | 32 +++++++++++++++++++++++++++++--- 1 file changed, 29 insertions(+), 3 deletions(-) diff --git a/examples/serverless-api/main.py b/examples/serverless-api/main.py index 2c54bf7..40b31d0 100644 --- a/examples/serverless-api/main.py +++ b/examples/serverless-api/main.py @@ -1,10 +1,36 @@ -import edge_tts +""" +This module provides a serverless API for text-to-speech conversion using Edge TTS. + +It includes functionality to generate audio and subtitles from input text, +utilizing the edge_tts library. The main function, `run`, is designed to be +used in a serverless environment, returning an asynchronous generator that +yields audio data and subtitles. + +Dependencies: + - edge_tts: For text-to-speech conversion + - typing: For type hinting + +Usage: + The main entry point is the `run` function, which takes text input + and an optional voice parameter to generate audio and subtitles. +""" + from typing import Dict from typing import AsyncGenerator +import edge_tts async def run(text: str, voice: str = "en-GB-SoniaNeural") -> AsyncGenerator[Dict[str, str], None]: - + """ + Asynchronously generates audio and subtitles for the given text using the specified voice. + + Args: + text (str): The text to be converted to speech. + voice (str): The voice model to use, defaults to "en-GB-SoniaNeural". + + Returns: + AsyncGenerator[Dict[str, str], None]: A generator that yields dictionaries containing + """ communicate = edge_tts.Communicate(text, voice) submaker = edge_tts.SubMaker() audio_data = bytearray() @@ -20,4 +46,4 @@ async def run(text: str, voice: str = "en-GB-SoniaNeural") -> AsyncGenerator[Dic yield { "audio_data": audio_data.decode("latin-1"), "subtitles": subtitles - } \ No newline at end of file + }