Skip to content

Commit

Permalink
Debug the issue #23
Browse files Browse the repository at this point in the history
  • Loading branch information
ChenX17 committed Dec 17, 2023
1 parent 41cb247 commit 35037f1
Show file tree
Hide file tree
Showing 4 changed files with 10 additions and 9 deletions.
3 changes: 2 additions & 1 deletion config/fs2.json
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
"extract_energy": true,
"energy_extract_mode": "from_tacotron_stft",
"extract_duration": true,
"use_phone": true,
"use_phone": false,
"pitch_norm": true,
"energy_norm": true,
"pitch_remove_outlier": true,
Expand Down Expand Up @@ -47,6 +47,7 @@
"mert_dir": "mert",
"spk2id":"spk2id.json",
"utt2spk":"utt2spk",
"valid_file": "test.json",

// Features used for model training
"use_mel": true,
Expand Down
1 change: 1 addition & 0 deletions models/tts/fastspeech2/fs2_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@ def inference_for_batches(self):
)
os.remove(os.path.join(self.args.output_dir, f"{uid}.pt"))

@torch.inference_mode()
def _inference_each_batch(self, batch_data):
device = self.accelerator.device
control_values = (
Expand Down
9 changes: 5 additions & 4 deletions processors/acoustic_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,11 @@
from utils.data_utils import remove_outlier
from preprocessors.metadata import replace_augment_name
from scipy.interpolate import interp1d
from utils.mel import (
extract_mel_features,
extract_linear_features,
extract_mel_features_tts,
)

ZERO = 1e-12

Expand Down Expand Up @@ -124,16 +129,12 @@ def __extract_utt_acoustic_features(dataset_output, cfg, utt):
wav_torch = torch.from_numpy(wav).to(wav_torch.device)

if cfg.preprocess.extract_linear_spec:
from utils.mel import extract_linear_features

linear = extract_linear_features(wav_torch.unsqueeze(0), cfg.preprocess)
save_feature(
dataset_output, cfg.preprocess.linear_dir, uid, linear.cpu().numpy()
)

if cfg.preprocess.extract_mel:
from utils.mel import extract_mel_features

if cfg.preprocess.mel_extract_mode == "taco":
_stft = TacotronSTFT(
sampling_rate=cfg.preprocess.sample_rate,
Expand Down
6 changes: 2 additions & 4 deletions utils/mel.py
Original file line number Diff line number Diff line change
Expand Up @@ -232,14 +232,12 @@ def extract_mel_features_tts(
spec = torch.matmul(mel_basis[str(cfg.fmax) + "_" + str(y.device)], spec)
spec = spectral_normalize_torch(spec)
spec = spec.squeeze(0)
spec = torch.matmul(mel_basis[str(cfg.fmax) + "_" + str(y.device)], spec)
spec = spectral_normalize_torch(spec)
else:
audio = torch.clip(y, -1, 1)
audio = torch.autograd.Variable(audio, requires_grad=False)
spec, energy = _stft.mel_spectrogram(audio)
spec = torch.squeeze(spec, 0)

spec = torch.matmul(mel_basis[str(cfg.fmax) + "_" + str(y.device)], spec)
spec = spectral_normalize_torch(spec)

return spec.squeeze(0)

Expand Down

0 comments on commit 35037f1

Please sign in to comment.