diff --git a/README.md b/README.md index 80c6bb1..ec7f82a 100644 --- a/README.md +++ b/README.md @@ -73,6 +73,10 @@ It is possible to make minor changes to the generated speech. In addition, it is required to use `--rate=-50%` instead of `--rate -50%` (note the lack of an equal sign) otherwise the `-50%` would be interpreted as just another argument. +### Deploying to Serverless API + +See the [examples/serverless-api](examples/serverless-api) folder for more information on how to deploy to [Cerebrium](https://www.cerebrium.ai). + ### Note on the `edge-playback` command `edge-playback` is just a wrapper around `edge-tts` that plays back the generated speech. It takes the same arguments as the `edge-tts` option. diff --git a/examples/serverless-api/README.md b/examples/serverless-api/README.md new file mode 100644 index 0000000..3c0fba1 --- /dev/null +++ b/examples/serverless-api/README.md @@ -0,0 +1,33 @@ +# Serverless Edge TTS API + +This project demonstrates how to run Edge TTS as a serverless API using [Cerebrium](https://www.cerebrium.ai) + +## Overview + +The `main.py` file contains a function `run` that takes a text input and an optional voice parameter to generate audio and subtitles using Edge TTS. + +## Installation + +1. pip install cerebrium +2. cerebrium login +3. Make sure you are in the serverless-api folder and run ```cerebrium deploy``` + +## Usage + +Once deployed, you should be able to make a curl request similar to: +``` +curl --location 'https://api.cortex.cerebrium.ai/v4/p-xxxxxx/serverless-api/run' \ +--header 'Authorization: Bearer ' \ +--header 'Content-Type: application/json' \ +--data '{"text": "Tell me something"}' +``` + +The `run` function takes two parameters: + +- `text` (str): The text to be converted to speech +- `voice` (str, optional): The voice to use for TTS (default: "en-GB-SoniaNeural") + +It returns a dictionary containing: + +- `audio_data`: The generated audio as a base64-encoded string +- `subtitles`: The generated subtitles in WebVTT format diff --git a/examples/serverless-api/cerebrium.toml b/examples/serverless-api/cerebrium.toml new file mode 100644 index 0000000..6983bdb --- /dev/null +++ b/examples/serverless-api/cerebrium.toml @@ -0,0 +1,19 @@ +[cerebrium.deployment] +name = "serverless-api" +python_version = "3.11" +docker_base_image_url = "debian:bookworm-slim" +include = "[./*, main.py, cerebrium.toml]" +exclude = "[.*]" + +[cerebrium.hardware] +cpu = 2 +memory = 12.0 +compute = "CPU" + +[cerebrium.scaling] +min_replicas = 0 +max_replicas = 5 +cooldown = 30 + +[cerebrium.dependencies.pip] +"edge-tts" = "latest" \ No newline at end of file diff --git a/examples/serverless-api/main.py b/examples/serverless-api/main.py new file mode 100644 index 0000000..87aa140 --- /dev/null +++ b/examples/serverless-api/main.py @@ -0,0 +1,21 @@ + +import edge_tts + +async def run(text: str, voice: str = "en-GB-SoniaNeural"): + + communicate = edge_tts.Communicate(text, voice) + submaker = edge_tts.SubMaker() + audio_data = bytearray() + subtitles = "" + + async for chunk in communicate.stream(): + if chunk["type"] == "audio": + audio_data.extend(chunk["data"]) + elif chunk["type"] == "WordBoundary": + submaker.create_sub((chunk["offset"], chunk["duration"]), chunk["text"]) + + subtitles = submaker.generate_subs() + return { + "audio_data": audio_data.decode("latin-1"), + "subtitles": subtitles + } \ No newline at end of file