|
12 | 12 | from TTS.utils.manage import ModelManager |
13 | 13 | from TTS.utils.synthesizer import Synthesizer |
14 | 14 |
|
15 | | - |
16 | | -def str2bool(v): |
17 | | - if isinstance(v, bool): |
18 | | - return v |
19 | | - if v.lower() in ("yes", "true", "t", "y", "1"): |
20 | | - return True |
21 | | - if v.lower() in ("no", "false", "f", "n", "0"): |
22 | | - return False |
23 | | - raise argparse.ArgumentTypeError("Boolean value expected.") |
24 | | - |
25 | | - |
26 | | -def main(): |
27 | | - description = """Synthesize speech on command line. |
| 15 | +description = """ |
| 16 | +Synthesize speech on command line. |
28 | 17 |
|
29 | 18 | You can either use your trained model or choose a model from the provided list. |
30 | 19 |
|
31 | 20 | If you don't specify any models, then it uses LJSpeech based English model. |
32 | 21 |
|
33 | | -## Example Runs |
34 | | -
|
35 | | -### Single Speaker Models |
| 22 | +#### Single Speaker Models |
36 | 23 |
|
37 | 24 | - List provided models: |
38 | 25 |
|
| 26 | + ``` |
| 27 | + $ tts --list_models |
| 28 | + ``` |
| 29 | +
|
| 30 | +- Get model info (for both tts_models and vocoder_models): |
| 31 | +
|
| 32 | + - Query by type/name: |
| 33 | + The model_info_by_name uses the name as it from the --list_models. |
39 | 34 | ``` |
40 | | - $ tts --list_models |
| 35 | + $ tts --model_info_by_name "<model_type>/<language>/<dataset>/<model_name>" |
41 | 36 | ``` |
42 | | -
|
43 | | -- Query info for model info by idx: |
| 37 | + For example: |
| 38 | + ``` |
| 39 | + $ tts --model_info_by_name tts_models/tr/common-voice/glow-tts |
| 40 | + $ tts --model_info_by_name vocoder_models/en/ljspeech/hifigan_v2 |
| 41 | + ``` |
| 42 | + - Query by type/idx: |
| 43 | + The model_query_idx uses the corresponding idx from --list_models. |
44 | 44 |
|
45 | 45 | ``` |
46 | 46 | $ tts --model_info_by_idx "<model_type>/<model_query_idx>" |
47 | 47 | ``` |
48 | 48 |
|
49 | | -- Query info for model info by full name: |
| 49 | + For example: |
| 50 | +
|
| 51 | + ``` |
| 52 | + $ tts --model_info_by_idx tts_models/3 |
| 53 | + ``` |
50 | 54 |
|
| 55 | + - Query info for model info by full name: |
51 | 56 | ``` |
52 | 57 | $ tts --model_info_by_name "<model_type>/<language>/<dataset>/<model_name>" |
53 | 58 | ``` |
54 | 59 |
|
55 | 60 | - Run TTS with default models: |
56 | 61 |
|
57 | | - ``` |
58 | | - $ tts --text "Text for TTS" |
59 | | - ``` |
| 62 | + ``` |
| 63 | + $ tts --text "Text for TTS" --out_path output/path/speech.wav |
| 64 | + ``` |
60 | 65 |
|
61 | 66 | - Run a TTS model with its default vocoder model: |
62 | 67 |
|
63 | | - ``` |
64 | | - $ tts --text "Text for TTS" --model_name "<model_type>/<language>/<dataset>/<model_name> |
65 | | - ``` |
| 68 | + ``` |
| 69 | + $ tts --text "Text for TTS" --model_name "<model_type>/<language>/<dataset>/<model_name>" --out_path output/path/speech.wav |
| 70 | + ``` |
| 71 | +
|
| 72 | + For example: |
| 73 | +
|
| 74 | + ``` |
| 75 | + $ tts --text "Text for TTS" --model_name "tts_models/en/ljspeech/glow-tts" --out_path output/path/speech.wav |
| 76 | + ``` |
66 | 77 |
|
67 | 78 | - Run with specific TTS and vocoder models from the list: |
68 | 79 |
|
69 | | - ``` |
70 | | - $ tts --text "Text for TTS" --model_name "<model_type>/<language>/<dataset>/<model_name>" --vocoder_name "<model_type>/<language>/<dataset>/<model_name>" --output_path |
71 | | - ``` |
| 80 | + ``` |
| 81 | + $ tts --text "Text for TTS" --model_name "<model_type>/<language>/<dataset>/<model_name>" --vocoder_name "<model_type>/<language>/<dataset>/<model_name>" --out_path output/path/speech.wav |
| 82 | + ``` |
| 83 | +
|
| 84 | + For example: |
| 85 | +
|
| 86 | + ``` |
| 87 | + $ tts --text "Text for TTS" --model_name "tts_models/en/ljspeech/glow-tts" --vocoder_name "vocoder_models/en/ljspeech/univnet" --out_path output/path/speech.wav |
| 88 | + ``` |
72 | 89 |
|
73 | 90 | - Run your own TTS model (Using Griffin-Lim Vocoder): |
74 | 91 |
|
75 | | - ``` |
76 | | - $ tts --text "Text for TTS" --model_path path/to/model.pth --config_path path/to/config.json --out_path output/path/speech.wav |
77 | | - ``` |
| 92 | + ``` |
| 93 | + $ tts --text "Text for TTS" --model_path path/to/model.pth --config_path path/to/config.json --out_path output/path/speech.wav |
| 94 | + ``` |
78 | 95 |
|
79 | 96 | - Run your own TTS and Vocoder models: |
80 | | - ``` |
81 | | - $ tts --text "Text for TTS" --model_path path/to/config.json --config_path path/to/model.pth --out_path output/path/speech.wav |
82 | | - --vocoder_path path/to/vocoder.pth --vocoder_config_path path/to/vocoder_config.json |
83 | | - ``` |
84 | 97 |
|
85 | | -### Multi-speaker Models |
| 98 | + ``` |
| 99 | + $ tts --text "Text for TTS" --model_path path/to/model.pth --config_path path/to/config.json --out_path output/path/speech.wav |
| 100 | + --vocoder_path path/to/vocoder.pth --vocoder_config_path path/to/vocoder_config.json |
| 101 | + ``` |
86 | 102 |
|
87 | | -- List the available speakers and choose as <speaker_id> among them: |
| 103 | +#### Multi-speaker Models |
88 | 104 |
|
89 | | - ``` |
90 | | - $ tts --model_name "<language>/<dataset>/<model_name>" --list_speaker_idxs |
91 | | - ``` |
| 105 | +- List the available speakers and choose a <speaker_id> among them: |
| 106 | +
|
| 107 | + ``` |
| 108 | + $ tts --model_name "<language>/<dataset>/<model_name>" --list_speaker_idxs |
| 109 | + ``` |
92 | 110 |
|
93 | 111 | - Run the multi-speaker TTS model with the target speaker ID: |
94 | 112 |
|
95 | | - ``` |
96 | | - $ tts --text "Text for TTS." --out_path output/path/speech.wav --model_name "<language>/<dataset>/<model_name>" --speaker_idx <speaker_id> |
97 | | - ``` |
| 113 | + ``` |
| 114 | + $ tts --text "Text for TTS." --out_path output/path/speech.wav --model_name "<language>/<dataset>/<model_name>" --speaker_idx <speaker_id> |
| 115 | + ``` |
98 | 116 |
|
99 | 117 | - Run your own multi-speaker TTS model: |
100 | 118 |
|
101 | | - ``` |
102 | | - $ tts --text "Text for TTS" --out_path output/path/speech.wav --model_path path/to/config.json --config_path path/to/model.pth --speakers_file_path path/to/speaker.json --speaker_idx <speaker_id> |
103 | | - ``` |
| 119 | + ``` |
| 120 | + $ tts --text "Text for TTS" --out_path output/path/speech.wav --model_path path/to/model.pth --config_path path/to/config.json --speakers_file_path path/to/speaker.json --speaker_idx <speaker_id> |
| 121 | + ``` |
104 | 122 |
|
105 | 123 | ### Voice Conversion Models |
106 | 124 |
|
107 | | - ``` |
108 | | - $ tts --out_path output/path/speech.wav --model_name "<language>/<dataset>/<model_name>" --source_wav <path/to/speaker/wav> --target_wav <path/to/reference/wav> |
109 | | - ``` |
110 | | - """ |
111 | | - # We remove Markdown code formatting programmatically here to allow us to copy-and-paste from main README to keep |
112 | | - # documentation in sync more easily. |
| 125 | +``` |
| 126 | +$ tts --out_path output/path/speech.wav --model_name "<language>/<dataset>/<model_name>" --source_wav <path/to/speaker/wav> --target_wav <path/to/reference/wav> |
| 127 | +``` |
| 128 | +""" |
| 129 | + |
| 130 | + |
| 131 | +def str2bool(v): |
| 132 | + if isinstance(v, bool): |
| 133 | + return v |
| 134 | + if v.lower() in ("yes", "true", "t", "y", "1"): |
| 135 | + return True |
| 136 | + if v.lower() in ("no", "false", "f", "n", "0"): |
| 137 | + return False |
| 138 | + raise argparse.ArgumentTypeError("Boolean value expected.") |
| 139 | + |
| 140 | + |
| 141 | +def main(): |
113 | 142 | parser = argparse.ArgumentParser( |
114 | 143 | description=description.replace(" ```\n", ""), |
115 | 144 | formatter_class=RawTextHelpFormatter, |
|
0 commit comments