-
Notifications
You must be signed in to change notification settings - Fork 649
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
4 changed files
with
77 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
# Serverless Edge TTS API | ||
|
||
This project demonstrates how to run Edge TTS as a serverless API using [Cerebrium](https://www.cerebrium.ai) | ||
|
||
## Overview | ||
|
||
The `main.py` file contains a function `run` that takes a text input and an optional voice parameter to generate audio and subtitles using Edge TTS. | ||
|
||
## Installation | ||
|
||
1. pip install cerebrium | ||
2. cerebrium login | ||
3. Make sure you are in the serverless-api folder and run ```cerebrium deploy``` | ||
|
||
## Usage | ||
|
||
Once deployed, you should be able to make a curl request similar to: | ||
``` | ||
curl --location 'https://api.cortex.cerebrium.ai/v4/p-xxxxxx/serverless-api/run' \ | ||
--header 'Authorization: Bearer <AUTH_TOKEN>' \ | ||
--header 'Content-Type: application/json' \ | ||
--data '{"text": "Tell me something"}' | ||
``` | ||
|
||
The `run` function takes two parameters: | ||
|
||
- `text` (str): The text to be converted to speech | ||
- `voice` (str, optional): The voice to use for TTS (default: "en-GB-SoniaNeural") | ||
|
||
It returns a dictionary containing: | ||
|
||
- `audio_data`: The generated audio as a base64-encoded string | ||
- `subtitles`: The generated subtitles in WebVTT format |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
[cerebrium.deployment] | ||
name = "serverless-api" | ||
python_version = "3.11" | ||
docker_base_image_url = "debian:bookworm-slim" | ||
include = "[./*, main.py, cerebrium.toml]" | ||
exclude = "[.*]" | ||
|
||
[cerebrium.hardware] | ||
cpu = 2 | ||
memory = 12.0 | ||
compute = "CPU" | ||
|
||
[cerebrium.scaling] | ||
min_replicas = 0 | ||
max_replicas = 5 | ||
cooldown = 30 | ||
|
||
[cerebrium.dependencies.pip] | ||
"edge-tts" = "latest" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
|
||
import edge_tts | ||
|
||
async def run(text: str, voice: str = "en-GB-SoniaNeural"): | ||
|
||
communicate = edge_tts.Communicate(text, voice) | ||
submaker = edge_tts.SubMaker() | ||
audio_data = bytearray() | ||
subtitles = "" | ||
|
||
async for chunk in communicate.stream(): | ||
if chunk["type"] == "audio": | ||
audio_data.extend(chunk["data"]) | ||
elif chunk["type"] == "WordBoundary": | ||
submaker.create_sub((chunk["offset"], chunk["duration"]), chunk["text"]) | ||
|
||
subtitles = submaker.generate_subs() | ||
return { | ||
"audio_data": audio_data.decode("latin-1"), | ||
"subtitles": subtitles | ||
} |