diff --git a/README.md b/README.md
index abd6bc4..c13e98f 100644
--- a/README.md
+++ b/README.md
@@ -18,7 +18,7 @@ If you only want to use the `edge-tts` and `edge-playback` commands, it would be
 
 If you want to use the `edge-tts` command, you can simply run it with the following command:
 
-    $ edge-tts --text "Hello, world!" --write-media hello.mp3 --write-subtitles hello.vtt
+    $ edge-tts --text "Hello, world!" --write-media hello.mp3 --write-subtitles hello.srt
 
 If you wish to play it back immediately with subtitles, you could use the `edge-playback` command:
 
@@ -48,7 +48,7 @@ You can change the voice used by the text-to-speech service by using the `--voic
     ar-EG-SalmaNeural                  Female    General                Friendly, Positive
     ...
 
-    $ edge-tts --voice ar-EG-SalmaNeural --text "مرحبا كيف حالك؟" --write-media hello_in_arabic.mp3 --write-subtitles hello_in_arabic.vtt
+    $ edge-tts --voice ar-EG-SalmaNeural --text "مرحبا كيف حالك؟" --write-media hello_in_arabic.mp3 --write-subtitles hello_in_arabic.srt
 
 ### Custom SSML
 
@@ -58,9 +58,9 @@ Support for custom SSML was removed because Microsoft prevents the use of any SS
 
 You can change the rate, volume and pitch of the generated speech by using the `--rate`, `--volume` and `--pitch` options. When using a negative value, you will need to use `--[option]=-50%` instead of `--[option] -50%` to avoid the option being interpreted as a command line option.
 
-    $ edge-tts --rate=-50% --text "Hello, world!" --write-media hello_with_rate_lowered.mp3 --write-subtitles hello_with_rate_lowered.vtt
-    $ edge-tts --volume=-50% --text "Hello, world!" --write-media hello_with_volume_lowered.mp3 --write-subtitles hello_with_volume_lowered.vtt
-    $ edge-tts --pitch=-50Hz --text "Hello, world!" --write-media hello_with_pitch_lowered.mp3 --write-subtitles hello_with_pitch_lowered.vtt
+    $ edge-tts --rate=-50% --text "Hello, world!" --write-media hello_with_rate_lowered.mp3 --write-subtitles hello_with_rate_lowered.srt
+    $ edge-tts --volume=-50% --text "Hello, world!" --write-media hello_with_volume_lowered.mp3 --write-subtitles hello_with_volume_lowered.srt
+    $ edge-tts --pitch=-50Hz --text "Hello, world!" --write-media hello_with_pitch_lowered.mp3 --write-subtitles hello_with_pitch_lowered.srt
 
 ## Python module
 
diff --git a/examples/streaming_with_subtitles.py b/examples/streaming_with_subtitles.py
index cccc904..52e91bb 100644
--- a/examples/streaming_with_subtitles.py
+++ b/examples/streaming_with_subtitles.py
@@ -14,7 +14,7 @@
 TEXT = "Hello World!"
 VOICE = "en-GB-SoniaNeural"
 OUTPUT_FILE = "test.mp3"
-WEBVTT_FILE = "test.vtt"
+SRT_FILE = "test.srt"
 
 
 async def amain() -> None:
@@ -26,10 +26,10 @@ async def amain() -> None:
             if chunk["type"] == "audio":
                 file.write(chunk["data"])
             elif chunk["type"] == "WordBoundary":
-                submaker.create_sub((chunk["offset"], chunk["duration"]), chunk["text"])
+                submaker.add_cue((chunk["offset"], chunk["duration"]), chunk["text"])
 
-    with open(WEBVTT_FILE, "w", encoding="utf-8") as file:
-        file.write(submaker.generate_subs())
+    with open(SRT_FILE, "w", encoding="utf-8") as file:
+        file.write(submaker.get_srt())
 
 
 if __name__ == "__main__":
diff --git a/setup.py b/setup.py
index 5d06540..4af2066 100644
--- a/setup.py
+++ b/setup.py
@@ -6,6 +6,7 @@
     install_requires=[
         "aiohttp>=3.8.0",
         "certifi>=2023.11.17",
+        "srt>=3.4.1",
         "tabulate>=0.4.4",
         "typing-extensions>=4.1.0",
     ],
diff --git a/src/edge_playback/__main__.py b/src/edge_playback/__main__.py
index aed8801..89a5c27 100644
--- a/src/edge_playback/__main__.py
+++ b/src/edge_playback/__main__.py
@@ -25,7 +25,7 @@ def _main() -> None:
 
     keep = os.environ.get("EDGE_PLAYBACK_KEEP_TEMP") is not None
     mp3_fname = os.environ.get("EDGE_PLAYBACK_MP3_FILE")
-    vtt_fname = os.environ.get("EDGE_PLAYBACK_VTT_FILE")
+    srt_fname = os.environ.get("EDGE_PLAYBACK_SRT_FILE")
     media, subtitle = None, None
     try:
         if not mp3_fname:
@@ -33,18 +33,18 @@ def _main() -> None:
             media.close()
             mp3_fname = media.name
 
-        if not vtt_fname:
-            subtitle = tempfile.NamedTemporaryFile(suffix=".vtt", delete=False)
+        if not srt_fname:
+            subtitle = tempfile.NamedTemporaryFile(suffix=".srt", delete=False)
             subtitle.close()
-            vtt_fname = subtitle.name
+            srt_fname = subtitle.name
 
         print(f"Media file: {mp3_fname}")
-        print(f"Subtitle file: {vtt_fname}\n")
+        print(f"Subtitle file: {srt_fname}\n")
         with subprocess.Popen(
             [
                 "edge-tts",
                 f"--write-media={mp3_fname}",
-                f"--write-subtitles={vtt_fname}",
+                f"--write-subtitles={srt_fname}",
             ]
             + sys.argv[1:]
         ) as process:
@@ -53,19 +53,19 @@ def _main() -> None:
         with subprocess.Popen(
             [
                 "mpv",
-                f"--sub-file={vtt_fname}",
+                f"--sub-file={srt_fname}",
                 mp3_fname,
             ]
         ) as process:
             process.communicate()
     finally:
         if keep:
-            print(f"\nKeeping temporary files: {mp3_fname} and {vtt_fname}")
+            print(f"\nKeeping temporary files: {mp3_fname} and {srt_fname}")
         else:
             if mp3_fname is not None and os.path.exists(mp3_fname):
                 os.unlink(mp3_fname)
-            if vtt_fname is not None and os.path.exists(vtt_fname):
-                os.unlink(vtt_fname)
+            if srt_fname is not None and os.path.exists(srt_fname):
+                os.unlink(srt_fname)
 
 
 if __name__ == "__main__":
diff --git a/src/edge_tts/submaker.py b/src/edge_tts/submaker.py
index 330fc51..ac60c33 100644
--- a/src/edge_tts/submaker.py
+++ b/src/edge_tts/submaker.py
@@ -1,33 +1,8 @@
 """SubMaker module is used to generate subtitles from WordBoundary events."""
 
-import math
 from typing import List, Tuple
-from xml.sax.saxutils import escape, unescape
 
-
-def formatter(start_time: float, end_time: float, subdata: str) -> str:
-    """
-    formatter returns the timecode and the text of the subtitle.
-    """
-    return (
-        f"{mktimestamp(start_time)} --> {mktimestamp(end_time)}\r\n"
-        f"{escape(subdata)}\r\n\r\n"
-    )
-
-
-def mktimestamp(time_unit: float) -> str:
-    """
-    mktimestamp returns the timecode of the subtitle.
-
-    The timecode is in the format of 00:00:00.000.
-
-    Returns:
-        str: The timecode of the subtitle.
-    """
-    hour = math.floor(time_unit / 10**7 / 3600)
-    minute = math.floor((time_unit / 10**7 / 60) % 60)
-    seconds = (time_unit / 10**7) % 60
-    return f"{hour:02d}:{minute:02d}:{seconds:06.3f}"
+import srt  # type: ignore
 
 
 class SubMaker:
@@ -36,19 +11,11 @@ class SubMaker:
     """
 
     def __init__(self) -> None:
-        """
-        SubMaker constructor initializes the list of subtitles and the list of offsets.
+        self.cues: List[srt.Subtitle] = []  # type: ignore
 
-        Returns:
-            None
+    def add_cue(self, timestamp: Tuple[float, float], text: str) -> None:
         """
-        self.offset: List[Tuple[float, float]] = []
-        self.subs: List[str] = []
-
-    def create_sub(self, timestamp: Tuple[float, float], text: str) -> None:
-        """
-        create_sub creates a subtitle from the given timestamp and text,
-        and appends it to the list of subtitles.
+        Add a subtitle part to the SubMaker object.
 
         Args:
             timestamp (tuple): The offset and duration of the subtitle.
@@ -57,67 +24,20 @@ def create_sub(self, timestamp: Tuple[float, float], text: str) -> None:
         Returns:
             None
         """
-        self.offset.append((timestamp[0], timestamp[0] + timestamp[1]))
-        self.subs.append(text)
-
-    def generate_subs(self, words_in_cue: int = 10) -> str:
+        self.cues.append(
+            srt.Subtitle(
+                index=len(self.cues) + 1,
+                start=srt.timedelta(microseconds=timestamp[0] / 10),
+                end=srt.timedelta(microseconds=sum(timestamp) / 10),
+                content=text,
+            )
+        )
+
+    def get_srt(self) -> str:
         """
-        generate_subs generates the complete subtitle file.
-
-        Args:
-            words_in_cue (int): defines the number of words in a given cue
+        Get the SRT formatted subtitles from the SubMaker object.
 
         Returns:
-            str: The complete subtitle file.
+            str: The SRT formatted subtitles.
         """
-        if len(self.subs) != len(self.offset):
-            raise ValueError("subs and offset are not of the same length")
-
-        if words_in_cue <= 0:
-            raise ValueError("words_in_cue must be greater than 0")
-
-        data = "WEBVTT\r\n\r\n"
-        sub_state_count = 0
-        sub_state_start = -1.0
-        sub_state_subs = ""
-        for idx, (offset, subs) in enumerate(zip(self.offset, self.subs)):
-            start_time, end_time = offset
-            subs = unescape(subs)
-
-            # wordboundary is guaranteed not to contain whitespace
-            if len(sub_state_subs) > 0:
-                sub_state_subs += " "
-            sub_state_subs += subs
-
-            if sub_state_start == -1.0:
-                sub_state_start = start_time
-            sub_state_count += 1
-
-            if sub_state_count == words_in_cue or idx == len(self.offset) - 1:
-                subs = sub_state_subs
-                split_subs: List[str] = [
-                    subs[i : i + 79] for i in range(0, len(subs), 79)
-                ]
-                for i in range(len(split_subs) - 1):
-                    sub = split_subs[i]
-                    split_at_word = True
-                    if sub[-1] == " ":
-                        split_subs[i] = sub[:-1]
-                        split_at_word = False
-
-                    if sub[0] == " ":
-                        split_subs[i] = sub[1:]
-                        split_at_word = False
-
-                    if split_at_word:
-                        split_subs[i] += "-"
-
-                data += formatter(
-                    start_time=sub_state_start,
-                    end_time=end_time,
-                    subdata="\r\n".join(split_subs),
-                )
-                sub_state_count = 0
-                sub_state_start = -1
-                sub_state_subs = ""
-        return data
+        return srt.compose(self.cues)  # type: ignore
diff --git a/src/edge_tts/util.py b/src/edge_tts/util.py
index 64f0601..9781f56 100644
--- a/src/edge_tts/util.py
+++ b/src/edge_tts/util.py
@@ -61,7 +61,7 @@ async def _run_tts(args: Any) -> None:
             if chunk["type"] == "audio":
                 audio_file.write(chunk["data"])
             elif chunk["type"] == "WordBoundary":
-                subs.create_sub((chunk["offset"], chunk["duration"]), chunk["text"])
+                subs.add_cue((chunk["offset"], chunk["duration"]), chunk["text"])
 
     sub_file: Union[TextIOWrapper, TextIO] = (
         open(args.write_subtitles, "w", encoding="utf-8")
@@ -69,7 +69,7 @@ async def _run_tts(args: Any) -> None:
         else sys.stderr
     )
     with sub_file:
-        sub_file.write(subs.generate_subs(args.words_in_cue))
+        sub_file.write(subs.get_srt())
 
 
 async def amain() -> None:
@@ -93,12 +93,6 @@ async def amain() -> None:
     parser.add_argument("--rate", help="set TTS rate. Default +0%%.", default="+0%")
     parser.add_argument("--volume", help="set TTS volume. Default +0%%.", default="+0%")
     parser.add_argument("--pitch", help="set TTS pitch. Default +0Hz.", default="+0Hz")
-    parser.add_argument(
-        "--words-in-cue",
-        help="number of words in a subtitle cue. Default: 10.",
-        default=10,
-        type=float,
-    )
     parser.add_argument(
         "--write-media", help="send media output to file instead of stdout"
     )