coqui-ai
diff --git a/‎.gitignore‎
Lines changed: 1 addition & 0 deletions b/‎.gitignore‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎README.md‎
Lines changed: 4 additions & 4 deletions b/‎README.md‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎TTS/bin/compute_attention_masks.py‎
Lines changed: 1 addition & 1 deletion b/‎TTS/bin/compute_attention_masks.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎TTS/bin/compute_embeddings.py‎
Lines changed: 1 addition & 1 deletion b/‎TTS/bin/compute_embeddings.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎TTS/bin/distribute.py‎
Lines changed: 0 additions & 55 deletions b/‎TTS/bin/distribute.py‎
Lines changed: 0 additions & 55 deletions
diff --git a/‎TTS/bin/eval_encoder.py‎
Lines changed: 1 addition & 1 deletion b/‎TTS/bin/eval_encoder.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎TTS/bin/synthesize.py‎
Lines changed: 4 additions & 4 deletions b/‎TTS/bin/synthesize.py‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎TTS/encoder/README.md‎
Lines changed: 1 addition & 1 deletion b/‎TTS/encoder/README.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎TTS/encoder/utils/generic_utils.py‎
Lines changed: 2 additions & 2 deletions b/‎TTS/encoder/utils/generic_utils.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎TTS/encoder/utils/io.py‎
Lines changed: 2 additions & 2 deletions b/‎TTS/encoder/utils/io.py‎
Lines changed: 2 additions & 2 deletions
@@ -115,6 +115,7 @@ venv.bak/
 *.swo
 
 # pytorch models
+*.pth
 *.pth.tar
 result/
 
 
@@ -159,13 +159,13 @@ If you are on Windows, 👑@GuyPaddock wrote installation instructions [here](ht
 - Run your own TTS model (Using Griffin-Lim Vocoder):
 
     ```
-    $ tts --text "Text for TTS" --model_path path/to/model.pth.tar --config_path path/to/config.json --out_path output/path/speech.wav
+    $ tts --text "Text for TTS" --model_path path/to/model.pth --config_path path/to/config.json --out_path output/path/speech.wav
     ```
 
 - Run your own TTS and Vocoder models:
     ```
-    $ tts --text "Text for TTS" --model_path path/to/config.json --config_path path/to/model.pth.tar --out_path output/path/speech.wav
-        --vocoder_path path/to/vocoder.pth.tar --vocoder_config_path path/to/vocoder_config.json
+    $ tts --text "Text for TTS" --model_path path/to/config.json --config_path path/to/model.pth --out_path output/path/speech.wav
+        --vocoder_path path/to/vocoder.pth --vocoder_config_path path/to/vocoder_config.json
     ```
 
 ### Multi-speaker Models
@@ -185,7 +185,7 @@ If you are on Windows, 👑@GuyPaddock wrote installation instructions [here](ht
 - Run your own multi-speaker TTS model:
 
     ```
-    $ tts --text "Text for TTS" --out_path output/path/speech.wav --model_path path/to/config.json --config_path path/to/model.pth.tar --speakers_file_path path/to/speaker.json --speaker_idx <speaker_id>
+    $ tts --text "Text for TTS" --out_path output/path/speech.wav --model_path path/to/config.json --config_path path/to/model.pth --speakers_file_path path/to/speaker.json --speaker_idx <speaker_id>
     ```
 
 ## Directory Structure
 
@@ -25,7 +25,7 @@
         """
 Example run:
     CUDA_VISIBLE_DEVICE="0" python TTS/bin/compute_attention_masks.py
-        --model_path /data/rw/home/Models/ljspeech-dcattn-December-14-2020_11+10AM-9d0e8c7/checkpoint_200000.pth.tar
+        --model_path /data/rw/home/Models/ljspeech-dcattn-December-14-2020_11+10AM-9d0e8c7/checkpoint_200000.pth
         --config_path /data/rw/home/Models/ljspeech-dcattn-December-14-2020_11+10AM-9d0e8c7/config.json
         --dataset_metafile metadata.csv
         --data_path /root/LJSpeech-1.1/
 
@@ -12,7 +12,7 @@
     description="""Compute embedding vectors for each wav file in a dataset.\n\n"""
     """
     Example runs:
-    python TTS/bin/compute_embeddings.py speaker_encoder_model.pth.tar speaker_encoder_config.json  dataset_config.json embeddings_output_path/
+    python TTS/bin/compute_embeddings.py speaker_encoder_model.pth speaker_encoder_config.json  dataset_config.json embeddings_output_path/
     """,
     formatter_class=RawTextHelpFormatter,
 )
 
@@ -56,7 +56,7 @@ def compute_encoder_accuracy(dataset_items, encoder_manager):
         description="""Compute the accuracy of the encoder.\n\n"""
         """
         Example runs:
-        python TTS/bin/eval_encoder.py emotion_encoder_model.pth.tar emotion_encoder_config.json  dataset_config.json
+        python TTS/bin/eval_encoder.py emotion_encoder_model.pth emotion_encoder_config.json  dataset_config.json
         """,
         formatter_class=RawTextHelpFormatter,
     )
 
@@ -60,13 +60,13 @@ def main():
 - Run your own TTS model (Using Griffin-Lim Vocoder):
 
     ```
-    $ tts --text "Text for TTS" --model_path path/to/model.pth.tar --config_path path/to/config.json --out_path output/path/speech.wav
+    $ tts --text "Text for TTS" --model_path path/to/model.pth --config_path path/to/config.json --out_path output/path/speech.wav
     ```
 
 - Run your own TTS and Vocoder models:
     ```
-    $ tts --text "Text for TTS" --model_path path/to/config.json --config_path path/to/model.pth.tar --out_path output/path/speech.wav
-        --vocoder_path path/to/vocoder.pth.tar --vocoder_config_path path/to/vocoder_config.json
+    $ tts --text "Text for TTS" --model_path path/to/config.json --config_path path/to/model.pth --out_path output/path/speech.wav
+        --vocoder_path path/to/vocoder.pth --vocoder_config_path path/to/vocoder_config.json
     ```
 
 ### Multi-speaker Models
@@ -86,7 +86,7 @@ def main():
 - Run your own multi-speaker TTS model:
 
     ```
-    $ tts --text "Text for TTS" --out_path output/path/speech.wav --model_path path/to/config.json --config_path path/to/model.pth.tar --speakers_file_path path/to/speaker.json --speaker_idx <speaker_id>
+    $ tts --text "Text for TTS" --out_path output/path/speech.wav --model_path path/to/config.json --config_path path/to/model.pth --speakers_file_path path/to/speaker.json --speaker_idx <speaker_id>
     ```
     """
     # We remove Markdown code formatting programmatically here to allow us to copy-and-paste from main README to keep
 
@@ -14,5 +14,5 @@ To run the code, you need to follow the same flow as in TTS.
 
 - Define 'config.json' for your needs. Note that, audio parameters should match your TTS model.
 - Example training call ```python speaker_encoder/train.py --config_path speaker_encoder/config.json --data_path ~/Data/Libri-TTS/train-clean-360```
-- Generate embedding vectors ```python speaker_encoder/compute_embeddings.py --use_cuda true /model/path/best_model.pth.tar model/config/path/config.json dataset/path/ output_path``` . This code parses all .wav files at the given dataset path and generates the same folder structure under the output path with the generated embedding files.
+- Generate embedding vectors ```python speaker_encoder/compute_embeddings.py --use_cuda true /model/path/best_model.pth model/config/path/config.json dataset/path/ output_path``` . This code parses all .wav files at the given dataset path and generates the same folder structure under the output path with the generated embedding files.
 - Watch training on Tensorboard as in TTS
@@ -147,7 +147,7 @@ def setup_speaker_encoder_model(config: "Coqpit"):
 
 
 def save_checkpoint(model, optimizer, criterion, model_loss, out_path, current_step, epoch):
-    checkpoint_path = "checkpoint_{}.pth.tar".format(current_step)
+    checkpoint_path = "checkpoint_{}.pth".format(current_step)
     checkpoint_path = os.path.join(out_path, checkpoint_path)
     print(" | | > Checkpoint saving : {}".format(checkpoint_path))
 
@@ -177,7 +177,7 @@ def save_best_model(model, optimizer, criterion, model_loss, best_loss, out_path
             "date": datetime.date.today().strftime("%B %d, %Y"),
         }
         best_loss = model_loss
-        bestmodel_path = "best_model.pth.tar"
+        bestmodel_path = "best_model.pth"
         bestmodel_path = os.path.join(out_path, bestmodel_path)
         print("\n > BEST MODEL ({0:.5f}) : {1:}".format(model_loss, bestmodel_path))
         save_fsspec(state, bestmodel_path)
 
@@ -5,7 +5,7 @@
 
 
 def save_checkpoint(model, optimizer, model_loss, out_path, current_step):
-    checkpoint_path = "checkpoint_{}.pth.tar".format(current_step)
+    checkpoint_path = "checkpoint_{}.pth".format(current_step)
     checkpoint_path = os.path.join(out_path, checkpoint_path)
     print(" | | > Checkpoint saving : {}".format(checkpoint_path))
 
@@ -31,7 +31,7 @@ def save_best_model(model, optimizer, model_loss, best_loss, out_path, current_s
             "date": datetime.date.today().strftime("%B %d, %Y"),
         }
         best_loss = model_loss
-        bestmodel_path = "best_model.pth.tar"
+        bestmodel_path = "best_model.pth"
         bestmodel_path = os.path.join(out_path, bestmodel_path)
         print("\n > BEST MODEL ({0:.5f}) : {1:}".format(model_loss, bestmodel_path))
         save_fsspec(state, bestmodel_path)
Original file line number	Diff line number	Diff line change
`@@ -12,7 +12,7 @@`
`12`	`12`	`description="""Compute embedding vectors for each wav file in a dataset.\n\n"""`
`13`	`13`	`"""`
`14`	`14`	`Example runs:`
`15`		`- python TTS/bin/compute_embeddings.py speaker_encoder_model.pth.tar speaker_encoder_config.json dataset_config.json embeddings_output_path/`
	`15`	`+ python TTS/bin/compute_embeddings.py speaker_encoder_model.pth speaker_encoder_config.json dataset_config.json embeddings_output_path/`
`16`	`16`	`""",`
`17`	`17`	`formatter_class=RawTextHelpFormatter,`
`18`	`18`	`)`
Original file line number	Diff line number	Diff line change
`@@ -56,7 +56,7 @@ def compute_encoder_accuracy(dataset_items, encoder_manager):`
`56`	`56`	`description="""Compute the accuracy of the encoder.\n\n"""`
`57`	`57`	`"""`
`58`	`58`	`Example runs:`
`59`		`- python TTS/bin/eval_encoder.py emotion_encoder_model.pth.tar emotion_encoder_config.json dataset_config.json`
	`59`	`+ python TTS/bin/eval_encoder.py emotion_encoder_model.pth emotion_encoder_config.json dataset_config.json`
`60`	`60`	`""",`
`61`	`61`	`formatter_class=RawTextHelpFormatter,`
`62`	`62`	`)`