-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathmdct.py
111 lines (85 loc) · 3.16 KB
/
mdct.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
import scipy
import numpy as np
import math
import numpy as np
import pydub
def MDCT(data, N, isInverse=False):
""" Fast MDCT algorithm for window length a+b following pp. 141-143 of
Bosi & Goldberg, "Introduction to Digital Audio..." book
(and where 2/N factor is included in forward transform instead of inverse)
a: left half-window length
b: right half-window length
"""
n_0 = (N/2. + 1.) / 2.
n = np.linspace(0, N-1., N)
out = np.array([])
if isInverse:
# Actually, extend K and the data
K = np.linspace(0, N-1, N)
mirror = -data[::-1]
data = np.append(data, mirror)
twiddle_exponents = np.multiply(K, 1j*2.*np.pi*n_0/N)
twiddle = np.exp(twiddle_exponents)
twiddle_signal = np.multiply(data, twiddle)
ifft_data = np.fft.ifft(twiddle_signal)
shifted_n = np.add(n, n_0)
post_twiddle_exponents = np.multiply(shifted_n, 1j * np.pi / N)
post_twiddle = np.exp(post_twiddle_exponents)
out = np.multiply(ifft_data, post_twiddle)
out = np.real(out)
out = np.multiply(N, out)
else:
K = np.linspace(0, N/2. - 1, N/2)
twiddle_exponents = np.multiply(n, -1j * np.pi / N)
twiddle = np.exp(twiddle_exponents)
twiddle_signal = np.multiply(data, twiddle)
fft_data = np.fft.fft(twiddle_signal)
shifted_K = np.add(K, 0.5)
post_twiddle_exponents = np.multiply(shifted_K, -1j * (2*np.pi/N) * n_0)
post_twiddle = np.exp(post_twiddle_exponents)
out = np.multiply(2./N, fft_data[:N/2])
out = np.multiply(post_twiddle, out)
out = np.real(out)
return out
def IMDCT(data, N):
return MDCT(data, N, isInverse=True)
def slow_mdct(x,N):
return np.array([np.sum([x[n]*cos(np.pi/N*(n+0.5*(N+1))*(k+0.5)) for n in range(2*N)]) for k in range(N)])
def slow_imdct(X,N):
return (1./N)*np.array([np.sum([X[k]*cos(np.pi/N*(n+0.5*(N+1))*(k+0.5)) for k in range(N)]) for n in range(2*N)])
def chunk(data,N):
assert len(data)%N == 0
x = data.copy()
x = x.reshape((-1,N))
x = np.concatenate((x[:-1],x[1:]),axis=1)
return x
def dechunk(chunks):
x = np.zeros((chunks.shape[0]+1)*chunks.shape[1]/2)
N = chunks.shape[1]/2
for n in range(chunks.shape[0]):
x[n*N:(n+2)*N]+=chunks[n,:]
x[:N]+=chunks[0,:N]
x[-N:]+=chunks[-1,-N:]
return x
def loadf(fname):
f = pydub.AudioSegment.from_mp3(fname)
data = np.fromstring(f._data, np.int16)
data = data.astype(np.float64).reshape((-1,2))
data = data.mean(axis=1)
data -= data.mean(axis=0)
data /= data.std(axis=0) * 3.
return data
data = loadf('Kimiko_Ishizaka_-_01_-_Aria.mp3')
N = 1024
data = data[:-(len(data)%N)]
window = np.sin(np.pi/(2*N)*(np.arange(2*N)+0.5))
c = chunk(data,N)
x = dechunk(c)
spectrum = np.array([MDCT(window*cc,2*N) for cc in c])
f = np.linspace(0,44100/2.,1024)
bark = 13*np.arctan(0.00076*f)+3.5*np.arctan((f/3500.)**2)
bark_ind = bark.astype(int)
energies = np.zeros((spectrum.shape[0],26))
for n in range(26):
energies[:,n] = np.sqrt(((spectrum[:,bark_ind==n]**2).sum(axis=1)))
spectrum_norm = spectrum/energies[:,bark_ind]