forked from auspicious3000/autovc
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmake_spect.py
73 lines (57 loc) · 2.35 KB
/
make_spect.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
import os
import pickle
import numpy as np
import soundfile as sf
from scipy import signal
from scipy.signal import get_window
from librosa.filters import mel
from numpy.random import RandomState
import hashlib
def butter_highpass(cutoff, fs, order=5):
nyq = 0.5 * fs
normal_cutoff = cutoff / nyq
b, a = signal.butter(order, normal_cutoff, btype='high', analog=False)
return b, a
def pySTFT(x, fft_length=1024, hop_length=256):
x = np.pad(x, int(fft_length//2), mode='reflect')
noverlap = fft_length - hop_length
shape = x.shape[:-1]+((x.shape[-1]-noverlap)//hop_length, fft_length)
strides = x.strides[:-1]+(hop_length*x.strides[-1], x.strides[-1])
result = np.lib.stride_tricks.as_strided(x, shape=shape,
strides=strides)
fft_window = get_window('hann', fft_length, fftbins=True)
result = np.fft.rfft(fft_window * result, n=fft_length).T
return np.abs(result)
mel_basis = mel(16000, 1024, fmin=90, fmax=7600, n_mels=80).T
min_level = np.exp(-100 / 20 * np.log(10))
b, a = butter_highpass(30, 16000, order=5)
# audio file directory
rootDir = './wavs'
# spectrogram directory
targetDir = './spmel'
dirName, subdirList, _ = next(os.walk(rootDir))
print('Found directory: %s' % dirName)
for subdir in sorted(subdirList):
print(subdir)
if not os.path.exists(os.path.join(targetDir, subdir)):
os.makedirs(os.path.join(targetDir, subdir))
_,_, fileList = next(os.walk(os.path.join(dirName,subdir)))
digest = hashlib.sha512(bytes(subdir, 'utf-8')).hexdigest()
seed = int(digest, 16) % (2 ** 32 - 1)
prng = RandomState(seed)
for fileName in sorted(fileList):
# Read audio file
x, fs = sf.read(os.path.join(dirName,subdir,fileName))
# Remove drifting noise
y = signal.filtfilt(b, a, x)
# Ddd a little random noise for model roubstness
wav = y * 0.96 + (prng.rand(y.shape[0])-0.5)*1e-06
# Compute spect
D = pySTFT(wav).T
# Convert to mel and normalize
D_mel = np.dot(D, mel_basis)
D_db = 20 * np.log10(np.maximum(min_level, D_mel)) - 16
S = np.clip((D_db + 100) / 100, 0, 1)
# save spect
np.save(os.path.join(targetDir, subdir, fileName[:-4]),
S.astype(np.float32), allow_pickle=False)