This repository has been archived by the owner on Feb 27, 2020. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 53
/
wavenet_decoder.py
88 lines (74 loc) · 4.2 KB
/
wavenet_decoder.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
#####################################################################################
# MIT License #
# #
# Copyright (C) 2019 Charly Lamothe #
# #
# This file is part of VQ-VAE-Speech. #
# #
# Permission is hereby granted, free of charge, to any person obtaining a copy #
# of this software and associated documentation files (the "Software"), to deal #
# in the Software without restriction, including without limitation the rights #
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell #
# copies of the Software, and to permit persons to whom the Software is #
# furnished to do so, subject to the following conditions: #
# #
# The above copyright notice and this permission notice shall be included in all #
# copies or substantial portions of the Software. #
# #
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR #
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, #
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE #
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER #
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, #
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE #
# SOFTWARE. #
#####################################################################################
from modules.residual_stack import ResidualStack
from modules.conv1d_builder import Conv1DBuilder
from modules.jitter import Jitter
from wavenet_vocoder.wavenet import WaveNet
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
class WaveNetDecoder(nn.Module):
def __init__(self, configuration, speaker_dic, device):
super(WaveNetDecoder, self).__init__()
self._use_jitter = configuration['use_jitter']
# Apply the randomized time-jitter regularization
if self._use_jitter:
self._jitter = Jitter(configuration['jitter_probability'])
"""
The jittered latent sequence is passed through a single
convolutional layer with filter length 3 and 128 hidden
units to mix information across neighboring timesteps.
"""
self._conv_1 = Conv1DBuilder.build(
in_channels=64,
out_channels=768,
kernel_size=2,
use_kaiming_normal=configuration['use_kaiming_normal']
)
#self._wavenet = WaveNetFactory.build(wavenet_type)
self._wavenet = WaveNet(
configuration['quantize'],
configuration['n_layers'],
configuration['n_loop'],
configuration['residual_channels'],
configuration['gate_channels'],
configuration['skip_out_channels'],
configuration['filter_size'],
cin_channels=configuration['local_condition_dim'],
gin_channels=configuration['global_condition_dim'],
n_speakers=len(speaker_dic),
upsample_conditional_features=True,
upsample_scales=[2, 2, 2, 2, 2, 12] # 768
#upsample_scales=[2, 2, 2, 2, 12]
)
self._device = device
def forward(self, y, local_condition, global_condition):
if self._use_jitter and self.training:
local_condition = self._jitter(local_condition)
local_condition = self._conv_1(local_condition)
x = self._wavenet(y, local_condition, global_condition)
return x