-
Notifications
You must be signed in to change notification settings - Fork 3
/
fasticabm.m
212 lines (201 loc) · 8.08 KB
/
fasticabm.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
% Fastica with Binary Mask
% Adopted from M.S. Pedersen et. al, "Two-Micrphone Separation..."
clear all; close all;
% Initial value
dis=1;
evalu=0;
rand('state',1)
y={[]};
M=2; %number of sensors
u=0.5;%cardioid pattern controller
N=2;% number of signals in mixture %%% only used in mixing process
th=1; %mask threshold control (tau)?
stopthresholdini=3000; %One source if condition number is above this value
TC1=0.1; %Merge finalstereo signals if correlation is above TC1
TC2=0.03; %Merge finalstereo and enerstereo if correlation is above TC2
numlags=1; %%% when correlation between two signals are found, how many lags should be considered?
thepow=20; %%% tau_E
minpow=30; %%% included to save a few calculations.
winnumber=3; %%Selects window function
NFFT=2048;%%%DFT length
k=4; %%% window length is NFFT/k
lastfmasklength=1;
switch winnumber
case 1
WINDOW=hanning(NFFT/k);
case 2
WINDOW=hann(NFFT/k);
case 3
WINDOW=hamming(NFFT/k);
case 4
WINDOW=bartlett(NFFT/k);%(triangular)
case 5
WINDOW=triang(NFFT/k);
case 6
WINDOW=blackman(NFFT/k);
case 7
WINDOW=rectwin(NFFT/k);
end
noverlapfactor=0.75;% overlap between consecutive windows
NOVERLAP=length(WINDOW)*noverlapfactor;
%read input
[X,fs]=wavread('fm0030_25.wav');
X=X';
%%estimate energy stop thresholds
powpow=10*log10((sum(X(1,:).^2)+sum(X(2,:).^2))/(2*length(X)));%power of mixture(used to determine thresholds)
thE=powpow-thepow; %power threshold
minpower=powpow-minpow;%do not save signals with very little power (makes cumputations faster)
%%% create ideal masks for evaluation purpose
x={X}; %initialize buffer of signals to be separated
mask={[]}; %initialize mask buffer
fmask={[]};
enermask={[]};
delete_me_again=sg(X(1,:),NFFT,fs,WINDOW,NOVERLAP);%get dimensions of mask
lastremmask=zeros(size(delete_me_again));
clear delete_me_again
countmax=30; %the maximum iteration number (to be absolutely sure that the algorithm stops)
finalcnt=1; %counts the number of separated output signals
enercnt=1; %counts the number of low energy masks
exitcnt=1; % counts the number of times the mask buffer is emptied and the remaining mask is estimated
cnt=0;%number of iterations
if dis
disp('Starting algorithm...')
end
while cnt<countmax
sx=size(x,2); %%%%size of the signal vector. the signals are grouped in sx/2 grops. When there is no more signals left in x, the separation task is over.
cnt=cnt+1;
% stopthreshold=max(stopthresholdini/cnt,minstopthreshold)%enable if stop threshold should change during iterations
stopthreshold=stopthresholdini;
%%% make empty buffers to store signal pairs and corresponding masks for evaluation
xbuffer={};
maskbuffer={};
for n=1:sx
%%% Run FastICA algorithm
[y{n},Aest,Wf]=fastica(x{n},'approach','symm', 'finetune', 'tanh');
%[y{n},Aest] = icaML(x{n});
for m=1:2 %%%normalization
y{n}(m,:)=y{n}(m,:)/(10*sqrt(var(y{n}(m,:))));
end
%%% ICA evaluation
if(0)%%% plots ICA output beampatterns
evalICAdB(W,theta_est,theta,N,Ns,labelvec,u)
end
%%% estimate mask
if dis
disp('Finding mask...')
end
if evalu
[newX{1}(1,:),newX{1}(2,:),newX{2}(1,:),newX{2}(2,:),msk{1},msk{2}]=applymasks(X(1,:)',X(2,:)',y{n}(1,:)',y{n}(2,:)',mask{n},fs,th,NFFT,WINDOW,NOVERLAP,cmR,cmL);
else
[newX{1}(1,:),newX{1}(2,:),newX{2}(1,:),newX{2}(2,:),msk{1},msk{2}]=applymasks(X(1,:)',X(2,:)',y{n}(1,:)',y{n}(2,:)',mask{n},fs,th,NFFT,WINDOW,NOVERLAP);
end
%% binaural stopping criteria
for m=1:2
condi=oneortwo_cond(newX{m}(1,:),newX{m}(2,:),fs);%%% one or two signals based on condition number
if(enerstop(newX{m}(1,:),newX{m}(2,:),thE,minpower)==2)
if dis
disp('Not a speech signal-too low energy')
end
elseif(enerstop(newX{m}(1,:),newX{m}(2,:),thE,minpower)==1)
if dis
disp('Not a good quality speech signal')
end
[L,R,enermask{enercnt}]=getfinalmask(X(1,:)',X(2,:)',y{n}(1,:)',y{n}(2,:)',mask{n},fs,th,m,NFFT,WINDOW,NOVERLAP,1);%to save memory, enermask is a sparse matrix
stestr=strcat('enerstereo',int2str(enercnt));%not scaled
if dis
disp(strcat('enerstereo',int2str(enercnt)))
end
wavwrite([L,R],fs,stestr);
enercnt=enercnt+1;
elseif(condi>stopthreshold)
if dis
disp('Save as final signal...')
end
[L,R,fmask{finalcnt}]=getfinalmask(X(1,:)',X(2,:)',y{n}(1,:)',y{n}(2,:)',mask{n},fs,1,m,NFFT,WINDOW,NOVERLAP,0);%notice that in the last step, the threshold is set to 1.
%str=strcat('y',int2str(m),'iter',int2str(cnt),'n',int2str(n),'final',int2str(finalcnt))
%wavwrite(y{n}(m,:),fs,str);%%%save files
stestr=strcat('finalstereo',int2str(finalcnt));%not scaled
if dis
disp(strcat('finalstereo',int2str(finalcnt)))
end
wavwrite([L,R],fs,stestr);
finalcnt=finalcnt+1;%%%save files
else %continue
xbuffer={xbuffer{1:end},newX{m}};
maskbuffer={maskbuffer{1:end},msk{m}};
% str=strcat('y',int2str(m),'iter',int2str(cnt),'n',int2str(n))
% wavwrite(y{n}(m,:),fs,str);
end
end
end
clear x
x=xbuffer;
clear mask
mask=maskbuffer;
if(isempty(xbuffer))%%% all branches in the algorithm has ended, no more signal pairs to which ICA should be applied
if dis
disp('Stopping separation algorithm')
end
if length(fmask)~=lastfmasklength; %check if one source appears more than once in the final signals
fmask=multisigcheck(fmask,X(1,:)',X(2,:)',TC1,fs,NFFT,WINDOW,NOVERLAP,numlags);
lastfmasklength=length(fmask);
finalcnt=lastfmasklength+1;
end
if isempty(enermask{1})
else %check if poor quality signal is correlated with one of the final signals
fmask=nosigcorr(fmask,enermask,X(1,:)',X(2,:)',TC2,fs,NFFT,WINDOW,NOVERLAP,numlags);
end
enercnt=1;
enermask={[]};
if isempty(fmask{1})
if dis
disp('No signals segregated. Stopping separation procedure.')'
end
exitcnt=4;
break
else
if dis
disp('Finding remaining mask...')
end
if evalu
[L,R,remainingmask]=getremainingmask(X(1,:)',X(2,:)',fmask,fs,NFFT,WINDOW,NOVERLAP,cmL,cmR);
else
[L,R,remainingmask]=getremainingmask(X(1,:)',X(2,:)',fmask,fs,NFFT,WINDOW,NOVERLAP);
end
end
if lastremmask==remainingmask %%% Should not continue if background mask have not changed.
if dis
disp('No changes. Stopping separation procedure.')
end
exitcnt=4;
wavwrite([L,R],fs,'remaining');
break
else
lastremmask=remainingmask;%%% to see if the next background mask have changed.
end
wavwrite([L,R],fs,'remaining');
condi=oneortwo_cond(newX{m}(1,:),newX{m}(2,:),fs);
if(enerstop(L',R',thE,minpower,fs)>0) %% enough energy in remaining mask?
if dis
disp('Not a speech signal')
end
break
elseif(condi>stopthreshold)%%%more than one signal in remaining?
stestr=strcat('finalstereo',int2str(finalcnt));%not scaled
wavwrite([L,R],fs,stestr);
if dis
disp('save remaing')
disp(strcat('finalstereo',int2str(finalcnt)))
end
fmask{finalcnt}=remainingmask;
break
else
mask={remainingmask};
x={[L';R']};
end
if exitcnt>=4
break
end
exitcnt=exitcnt+1;
end
end