Skip to content

Commit

Permalink
Cleanup to prepare for release
Browse files Browse the repository at this point in the history
Signed-off-by: rany2 <[email protected]>
  • Loading branch information
rany2 committed Dec 14, 2023
1 parent 43c25a4 commit b8d3e70
Show file tree
Hide file tree
Showing 5 changed files with 83 additions and 48 deletions.
2 changes: 1 addition & 1 deletion examples/streaming_with_subtitles.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ async def amain() -> None:
submaker.create_sub((chunk["offset"], chunk["duration"]), chunk["text"])

with open(WEBVTT_FILE, "w", encoding="utf-8") as file:
file.write(submaker.generate_subs())
file.write(submaker.generate_subs(TEXT))


if __name__ == "__main__":
Expand Down
3 changes: 1 addition & 2 deletions pylintrc
Original file line number Diff line number Diff line change
Expand Up @@ -307,8 +307,7 @@ min-public-methods=2
[EXCEPTIONS]

# Exceptions that will emit a warning when caught.
overgeneral-exceptions=BaseException,
Exception
overgeneral-exceptions=builtins.BaseException,builtins.Exception


[FORMAT]
Expand Down
6 changes: 4 additions & 2 deletions src/edge_tts/communicate.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,7 +239,7 @@ def __init__(
volume: str = "+0%",
pitch: str = "+0Hz",
proxy: Optional[str] = None,
):
): # pylint: disable=too-many-arguments
"""
Initializes the Communicate class.
Expand Down Expand Up @@ -302,7 +302,9 @@ def __init__(
raise TypeError("proxy must be str")
self.proxy: Optional[str] = proxy

async def stream(self) -> AsyncGenerator[Dict[str, Any], None]:
async def stream( # pylint: disable=too-many-statements
self,
) -> AsyncGenerator[Dict[str, Any], None]:
"""Streams audio and metadata from the service."""

texts = split_text_by_byte_length(
Expand Down
93 changes: 75 additions & 18 deletions src/edge_tts/submaker.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@
"""

import math
from typing import List, Tuple
import re
from typing import Callable, List, Tuple, Union
from xml.sax.saxutils import escape, unescape


Expand Down Expand Up @@ -35,10 +36,60 @@ def mktimestamp(time_unit: float) -> str:
hour = math.floor(time_unit / 10**7 / 3600)
minute = math.floor((time_unit / 10**7 / 60) % 60)
seconds = (time_unit / 10**7) % 60
# return f"{hour:02d}:{minute:02d}:{seconds:06.3f}"
return f"{hour:02d}:{minute:02d}:{seconds:06.3f}".replace(".", ",")


def _spinoff_sentence(sentence: str) -> Tuple[str, str, int]:
"""
_spinoff_sentence returns the sentence, the last word of the sentence,
and the number of times the last word appears in the sentence.
Args:
sentence (str): The sentence to be processed.
Returns:
Tuple[str, str, int]: The sentence, the last word of the sentence,
and the number of times the last word appears in the sentence.
"""
if not isinstance(sentence, str):
raise TypeError("sentence must be a string")
last_word = sentence[-1]
last_word_num = sentence.count(last_word)
return (sentence, last_word, last_word_num)


def process_text(
text: str,
*,
pattern_chi: str = r"[:“”‘’──{}【】·《》〈〉,、;。?!]",

This comment has been minimized.

Copy link
@iamyb

iamyb Dec 31, 2023

are we able to provide one option to let user define such delimiters? thanks

spinoff_sentence: Callable[[str], Tuple[str, str, int]] = _spinoff_sentence,
) -> List[Tuple[str, str, int]]:
"""
process_text returns the three-dimensional list of the text to be passed
to SubMaker's generate_subs method.
Args:
text (str): The text to be processed.
pattern_chi (str): The pattern of Chinese characters.
spinoff_sentence (function): The function used to process the sentence.
Returns:
List[Tuple[str, str, int]]: The three-dimensional list of the text.
"""
if not isinstance(text, str):
raise TypeError("text must be a string")
if not isinstance(pattern_chi, str):
raise TypeError("pattern_chi must be a string")
if not callable(spinoff_sentence):
raise TypeError("spinoff_sentence must be a function")
sentences = re.split(pattern_chi, text)
sentences = [sentence.strip() for sentence in sentences if sentence.strip()]
three_dimensional_list = []
for sentence in sentences:
three_dimensional_list.append(spinoff_sentence(sentence))
return three_dimensional_list


class SubMaker:
"""
SubMaker class
Expand All @@ -54,7 +105,8 @@ def __init__(self) -> None:
def create_sub(self, timestamp: Tuple[float, float], text: str) -> None:
"""
create_sub creates a subtitle with the given timestamp and text
and adds it to the list of subtitles
and adds it to the list of subtitles, this should be called
when receiving the wordboundary event from the service.
Args:
timestamp (tuple): The offset and duration of the subtitle.
Expand All @@ -66,53 +118,58 @@ def create_sub(self, timestamp: Tuple[float, float], text: str) -> None:
self.offset.append((timestamp[0], timestamp[0] + timestamp[1]))
self.subs.append(text)

def generate_subs(self, three_dimensional_list, words_in_cue: int = 10) -> str:
def generate_subs(self, text: Union[str, List[Tuple[str, str, int]]]) -> str:
"""
generate_subs generates the complete subtitle file.
Args:
words_in_cue (int): defines the number of words in a given cue
text: If the type is List[Tuple[str, str, int]], it is the three-dimensional
list of the text already processed. If the type is str, the text will
be processed automatically by process_text with the default parameters.
It should not use data from WordBoundary events, but the text that was
used to generate the audio.
Returns:
str: The complete subtitle file.
three_dimensional_list:
[(sentence, last_word, last_word_num), (sentence, last_word, last_word_num)]
"""
if len(self.subs) != len(self.offset):
raise ValueError("subs and offset are not of the same length")

if words_in_cue <= 0:
raise ValueError("words_in_cue must be greater than 0")
if isinstance(text, str):
text = process_text(text)
elif isinstance(text, list):
for sentence, last_word, last_word_num in text:
if not isinstance(sentence, str):
raise TypeError("sentence (first element) must be a string")
if not isinstance(last_word, str):
raise TypeError("last_word (second element) must be a string")
if not isinstance(last_word_num, int):
raise TypeError("last_word_num (third element) must be an integer")
else:
raise TypeError("text must be a string or a list")

# data = "WEBVTT\r\n\r\n"
data = ""
sub_state_count = 0
sub_state_start = -1.0
sub_state_subs = ""
sub_line_count = (
0 # new variable used to indicate which line of subtitle this is
)
sub_line_count = 0
for idx, (offset, subs) in enumerate(zip(self.offset, self.subs)):
start_time, end_time = offset
subs = unescape(subs)

# wordboundary is guaranteed not to contain whitespace
# if len(sub_state_subs) > 0:
# sub_state_subs += " "
sub_state_subs += subs

if sub_state_start == -1.0:
sub_state_start = start_time
sub_state_count += 1

sentence, last_word, last_word_num = three_dimensional_list[sub_line_count]
sentence, last_word, last_word_num = text[sub_line_count]
if (
sub_state_subs.count(last_word) == last_word_num
or idx == len(self.offset) - 1
):
sub_line_count += 1
# subs = sub_state_subs
subs = sentence
split_subs: List[str] = [
subs[i : i + 79] for i in range(0, len(subs), 79)
Expand Down
27 changes: 2 additions & 25 deletions src/edge_tts/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@

import argparse
import asyncio
import re
import sys
from io import TextIOWrapper
from typing import Any, TextIO, Union
Expand All @@ -16,7 +15,7 @@
async def _print_voices(*, proxy: str) -> None:
"""Print all available voices."""
voices = await list_voices(proxy=proxy)
voices = sorted(voices, key=lambda voice: voice["ShortName"]) # type: ignore
voices = sorted(voices, key=lambda voice: voice["ShortName"])
for idx, voice in enumerate(voices):
if idx != 0:
print()
Expand All @@ -35,12 +34,6 @@ async def _print_voices(*, proxy: str) -> None:
print(f"{pretty_key_name}: {voice[key]}")


def _spinoff_sentence(sentence):
last_word = sentence[-1]
last_word_num = sentence.count(last_word)
return (sentence, last_word, last_word_num)


async def _run_tts(args: Any) -> None:
"""Run TTS after parsing arguments from command line."""

Expand All @@ -67,14 +60,6 @@ async def _run_tts(args: Any) -> None:
)

submaker: SubMaker = SubMaker()

pattern_chi = r"[:“”‘’──{}【】·《》〈〉,、;。?!]"
sentences = re.split(pattern_chi, args.text)
sentences = [sentence.strip() for sentence in sentences if sentence.strip()]
three_dimensional_list = []
for sentence in sentences:
three_dimensional_list.append(_spinoff_sentence(sentence))

with open(
args.write_media, "wb"
) if args.write_media else sys.stdout.buffer as audio_file:
Expand All @@ -90,9 +75,7 @@ async def _run_tts(args: Any) -> None:
else sys.stderr
)
with sub_file:
sub_file.write(
submaker.generate_subs(three_dimensional_list=three_dimensional_list)
)
sub_file.write(submaker.generate_subs(args.text))


async def amain() -> None:
Expand All @@ -116,12 +99,6 @@ async def amain() -> None:
parser.add_argument("--rate", help="set TTS rate. Default +0%%.", default="+0%")
parser.add_argument("--volume", help="set TTS volume. Default +0%%.", default="+0%")
parser.add_argument("--pitch", help="set TTS pitch. Default +0Hz.", default="+0Hz")
parser.add_argument(
"--words-in-cue",
help="number of words in a subtitle cue. Default: 10.",
default=10,
type=float,
)
parser.add_argument(
"--write-media", help="send media output to file instead of stdout"
)
Expand Down

0 comments on commit b8d3e70

Please sign in to comment.