Cleanup to prepare for release

Signed-off-by: rany2 <[email protected]>
rany2 · Dec 14, 2023 · b8d3e70 · iamyb · Dec 31, 2023 · b8d3e70
1 parent 43c25a4
commit b8d3e70
Show file tree

Hide file tree

Showing 5 changed files with 83 additions and 48 deletions.
diff --git a/examples/streaming_with_subtitles.py b/examples/streaming_with_subtitles.py
@@ -29,7 +29,7 @@ async def amain() -> None:
                 submaker.create_sub((chunk["offset"], chunk["duration"]), chunk["text"])
 
     with open(WEBVTT_FILE, "w", encoding="utf-8") as file:
-        file.write(submaker.generate_subs())
+        file.write(submaker.generate_subs(TEXT))
 
 
 if __name__ == "__main__":

diff --git a/pylintrc b/pylintrc
@@ -307,8 +307,7 @@ min-public-methods=2
 [EXCEPTIONS]
 
 # Exceptions that will emit a warning when caught.
-overgeneral-exceptions=BaseException,
-                       Exception
+overgeneral-exceptions=builtins.BaseException,builtins.Exception
 
 
 [FORMAT]

diff --git a/src/edge_tts/communicate.py b/src/edge_tts/communicate.py
@@ -239,7 +239,7 @@ def __init__(
         volume: str = "+0%",
         pitch: str = "+0Hz",
         proxy: Optional[str] = None,
-    ):
+    ):  # pylint: disable=too-many-arguments
         """
         Initializes the Communicate class.
 
@@ -302,7 +302,9 @@ def __init__(
             raise TypeError("proxy must be str")
         self.proxy: Optional[str] = proxy
 
-    async def stream(self) -> AsyncGenerator[Dict[str, Any], None]:
+    async def stream(  # pylint: disable=too-many-statements
+        self,
+    ) -> AsyncGenerator[Dict[str, Any], None]:
         """Streams audio and metadata from the service."""
 
         texts = split_text_by_byte_length(

diff --git a/src/edge_tts/submaker.py b/src/edge_tts/submaker.py
@@ -6,7 +6,8 @@
 """
 
 import math
-from typing import List, Tuple
+import re
+from typing import Callable, List, Tuple, Union
 from xml.sax.saxutils import escape, unescape
 
 
@@ -35,10 +36,60 @@ def mktimestamp(time_unit: float) -> str:
     hour = math.floor(time_unit / 10**7 / 3600)
     minute = math.floor((time_unit / 10**7 / 60) % 60)
     seconds = (time_unit / 10**7) % 60
-    # return f"{hour:02d}:{minute:02d}:{seconds:06.3f}"
     return f"{hour:02d}:{minute:02d}:{seconds:06.3f}".replace(".", ",")
 
 
+def _spinoff_sentence(sentence: str) -> Tuple[str, str, int]:
+    """
+    _spinoff_sentence returns the sentence, the last word of the sentence,
+    and the number of times the last word appears in the sentence.
+
+    Args:
+        sentence (str): The sentence to be processed.
+
+    Returns:
+        Tuple[str, str, int]: The sentence, the last word of the sentence,
+        and the number of times the last word appears in the sentence.
+    """
+    if not isinstance(sentence, str):
+        raise TypeError("sentence must be a string")
+    last_word = sentence[-1]
+    last_word_num = sentence.count(last_word)
+    return (sentence, last_word, last_word_num)
+
+
+def process_text(
+    text: str,
+    *,
+    pattern_chi: str = r"[：“”‘’──{}【】·《》〈〉，、；。？！]",
+    spinoff_sentence: Callable[[str], Tuple[str, str, int]] = _spinoff_sentence,
+) -> List[Tuple[str, str, int]]:
+    """
+    process_text returns the three-dimensional list of the text to be passed
+    to SubMaker's generate_subs method.
+
+    Args:
+        text (str): The text to be processed.
+        pattern_chi (str): The pattern of Chinese characters.
+        spinoff_sentence (function): The function used to process the sentence.
+
+    Returns:
+        List[Tuple[str, str, int]]: The three-dimensional list of the text.
+    """
+    if not isinstance(text, str):
+        raise TypeError("text must be a string")
+    if not isinstance(pattern_chi, str):
+        raise TypeError("pattern_chi must be a string")
+    if not callable(spinoff_sentence):
+        raise TypeError("spinoff_sentence must be a function")
+    sentences = re.split(pattern_chi, text)
+    sentences = [sentence.strip() for sentence in sentences if sentence.strip()]
+    three_dimensional_list = []
+    for sentence in sentences:
+        three_dimensional_list.append(spinoff_sentence(sentence))
+    return three_dimensional_list
+
+
 class SubMaker:
     """
     SubMaker class
@@ -54,7 +105,8 @@ def __init__(self) -> None:
     def create_sub(self, timestamp: Tuple[float, float], text: str) -> None:
         """
         create_sub creates a subtitle with the given timestamp and text
-        and adds it to the list of subtitles
+        and adds it to the list of subtitles, this should be called
+        when receiving the wordboundary event from the service.
 
         Args:
             timestamp (tuple): The offset and duration of the subtitle.
@@ -66,53 +118,58 @@ def create_sub(self, timestamp: Tuple[float, float], text: str) -> None:
         self.offset.append((timestamp[0], timestamp[0] + timestamp[1]))
         self.subs.append(text)
 
-    def generate_subs(self, three_dimensional_list, words_in_cue: int = 10) -> str:
+    def generate_subs(self, text: Union[str, List[Tuple[str, str, int]]]) -> str:
         """
         generate_subs generates the complete subtitle file.
 
         Args:
-            words_in_cue (int): defines the number of words in a given cue
+            text: If the type is List[Tuple[str, str, int]], it is the three-dimensional
+                list of the text already processed. If the type is str, the text will
+                be processed automatically by process_text with the default parameters.
+                It should not use data from WordBoundary events, but the text that was
+                used to generate the audio.
 
         Returns:
             str: The complete subtitle file.
-
-        three_dimensional_list：
-            [(sentence, last_word, last_word_num)， (sentence, last_word, last_word_num)]
         """
         if len(self.subs) != len(self.offset):
             raise ValueError("subs and offset are not of the same length")
 
-        if words_in_cue <= 0:
-            raise ValueError("words_in_cue must be greater than 0")
+        if isinstance(text, str):
+            text = process_text(text)
+        elif isinstance(text, list):
+            for sentence, last_word, last_word_num in text:
+                if not isinstance(sentence, str):
+                    raise TypeError("sentence (first element) must be a string")
+                if not isinstance(last_word, str):
+                    raise TypeError("last_word (second element) must be a string")
+                if not isinstance(last_word_num, int):
+                    raise TypeError("last_word_num (third element) must be an integer")
+        else:
+            raise TypeError("text must be a string or a list")
 
-        # data = "WEBVTT\r\n\r\n"
         data = ""
         sub_state_count = 0
         sub_state_start = -1.0
         sub_state_subs = ""
-        sub_line_count = (
-            0  # new variable used to indicate which line of subtitle this is
-        )
+        sub_line_count = 0
         for idx, (offset, subs) in enumerate(zip(self.offset, self.subs)):
             start_time, end_time = offset
             subs = unescape(subs)
 
             # wordboundary is guaranteed not to contain whitespace
-            # if len(sub_state_subs) > 0:
-            #     sub_state_subs += " "
             sub_state_subs += subs
 
             if sub_state_start == -1.0:
                 sub_state_start = start_time
             sub_state_count += 1
 
-            sentence, last_word, last_word_num = three_dimensional_list[sub_line_count]
+            sentence, last_word, last_word_num = text[sub_line_count]
             if (
                 sub_state_subs.count(last_word) == last_word_num
                 or idx == len(self.offset) - 1
             ):
                 sub_line_count += 1
-                # subs = sub_state_subs
                 subs = sentence
                 split_subs: List[str] = [
                     subs[i : i + 79] for i in range(0, len(subs), 79)

diff --git a/src/edge_tts/util.py b/src/edge_tts/util.py
@@ -5,7 +5,6 @@
 
 import argparse
 import asyncio
-import re
 import sys
 from io import TextIOWrapper
 from typing import Any, TextIO, Union
@@ -16,7 +15,7 @@
 async def _print_voices(*, proxy: str) -> None:
     """Print all available voices."""
     voices = await list_voices(proxy=proxy)
-    voices = sorted(voices, key=lambda voice: voice["ShortName"])  # type: ignore
+    voices = sorted(voices, key=lambda voice: voice["ShortName"])
     for idx, voice in enumerate(voices):
         if idx != 0:
             print()
@@ -35,12 +34,6 @@ async def _print_voices(*, proxy: str) -> None:
             print(f"{pretty_key_name}: {voice[key]}")
 
 
-def _spinoff_sentence(sentence):
-    last_word = sentence[-1]
-    last_word_num = sentence.count(last_word)
-    return (sentence, last_word, last_word_num)
-
-
 async def _run_tts(args: Any) -> None:
     """Run TTS after parsing arguments from command line."""
 
@@ -67,14 +60,6 @@ async def _run_tts(args: Any) -> None:
     )
 
     submaker: SubMaker = SubMaker()
-
-    pattern_chi = r"[：“”‘’──{}【】·《》〈〉，、；。？！]"
-    sentences = re.split(pattern_chi, args.text)
-    sentences = [sentence.strip() for sentence in sentences if sentence.strip()]
-    three_dimensional_list = []
-    for sentence in sentences:
-        three_dimensional_list.append(_spinoff_sentence(sentence))
-
     with open(
         args.write_media, "wb"
     ) if args.write_media else sys.stdout.buffer as audio_file:
@@ -90,9 +75,7 @@ async def _run_tts(args: Any) -> None:
         else sys.stderr
     )
     with sub_file:
-        sub_file.write(
-            submaker.generate_subs(three_dimensional_list=three_dimensional_list)
-        )
+        sub_file.write(submaker.generate_subs(args.text))
 
 
 async def amain() -> None:
@@ -116,12 +99,6 @@ async def amain() -> None:
     parser.add_argument("--rate", help="set TTS rate. Default +0%%.", default="+0%")
     parser.add_argument("--volume", help="set TTS volume. Default +0%%.", default="+0%")
     parser.add_argument("--pitch", help="set TTS pitch. Default +0Hz.", default="+0Hz")
-    parser.add_argument(
-        "--words-in-cue",
-        help="number of words in a subtitle cue. Default: 10.",
-        default=10,
-        type=float,
-    )
     parser.add_argument(
         "--write-media", help="send media output to file instead of stdout"
     )