added pitch variable back.

rany2 · Aug 30, 2023 · afd4ba8 · afd4ba8
1 parent 8f8a334
commit afd4ba8
Show file tree

Hide file tree

Showing 2 changed files with 13 additions and 8 deletions.
diff --git a/README.md b/README.md
@@ -72,8 +72,6 @@ It is possible to make minor changes to the generated speech.
 
 In addition, it is required to use `--rate=-50%` instead of `--rate -50%` (note the lack of an equal sign) otherwise the `-50%` would be interpreted as just another argument.
 
-**NOTE**: `--pitch` was removed in 6.0.3 as it no longer appears to have any effect.
-
 ### Note on the `edge-playback` command
 
 `edge-playback` is just a wrapper around `edge-tts` that plays back the generated speech. It takes the same arguments as the `edge-tts` option.

diff --git a/src/edge_tts/communicate.py b/src/edge_tts/communicate.py
@@ -152,7 +152,7 @@ def split_text_by_byte_length(
  yield new_text
 
 
-def mkssml(text: Union[str, bytes], voice: str, rate: str, volume: str) -> str:
+def mkssml(text: Union[str, bytes], voice: str, rate: str, volume: str, pitch: str) -> str:
  """
  Creates a SSML string from the given parameters.
 
@@ -164,7 +164,7 @@ def mkssml(text: Union[str, bytes], voice: str, rate: str, volume: str) -> str:
 
  ssml = (
  "<speak version='1.0' xmlns='http://www.w3.org/2001/10/synthesis' xml:lang='en-US'>"
- f"<voice name='{voice}'><prosody pitch='+0Hz' rate='{rate}' volume='{volume}'>"
+ f"<voice name='{voice}'><prosody pitch='{pitch}' rate='{rate}' volume='{volume}'>"
  f"{text}</prosody></voice></speak>"
  )
  return ssml
@@ -203,7 +203,7 @@ def ssml_headers_plus_data(request_id: str, timestamp: str, ssml: str) -> str:
  )
 
 
-def calc_max_mesg_size(voice: str, rate: str, volume: str) -> int:
+def calc_max_mesg_size(voice: str, rate: str, volume: str, pitch: str) -> int:
  """Calculates the maximum message size for the given voice, rate, and volume.
 
  Returns:
@@ -215,7 +215,7 @@ def calc_max_mesg_size(voice: str, rate: str, volume: str) -> int:
  ssml_headers_plus_data(
  connect_id(),
  date_to_string(),
- mkssml("", voice, rate, volume),
+ mkssml("", voice, rate, volume, pitch),
  )
  )
  + 50 # margin of error
@@ -235,6 +235,7 @@ def __init__(
  *,
  rate: str = "+0%",
  volume: str = "+0%",
+ pitch: str = "+0Hz",
  proxy: Optional[str] = None,
  ):
  """
@@ -289,6 +290,12 @@ def __init__(
  raise ValueError(f"Invalid volume '{volume}'.")
  self.volume: str = volume
 
+ if not isinstance(pitch, str):
+ raise TypeError("pitch must be str")
+ if re.match(r"^[+-]\d+Hz$", pitch) is None:
+ raise ValueError(f"Invalid pitch '{pitch}'.")
+ self.pitch: str = pitch
+
  if proxy is not None and not isinstance(proxy, str):
  raise TypeError("proxy must be str")
  self.proxy: Optional[str] = proxy
@@ -298,7 +305,7 @@ async def stream(self) -> AsyncGenerator[Dict[str, Any], None]:
 
  texts = split_text_by_byte_length(
  escape(remove_incompatible_characters(self.text)),
- calc_max_mesg_size(self.voice, self.rate, self.volume),
+ calc_max_mesg_size(self.voice, self.rate, self.volume, self.pitch),
  )
  final_utterance: Dict[int, int] = {}
  prev_idx = -1
@@ -362,7 +369,7 @@ async def stream(self) -> AsyncGenerator[Dict[str, Any], None]:
  ssml_headers_plus_data(
  connect_id(),
  date,
- mkssml(text, self.voice, self.rate, self.volume),
+ mkssml(text, self.voice, self.rate, self.volume, self.pitch),
  )
  )