-
Notifications
You must be signed in to change notification settings - Fork 11
/
Copy pathgpfinalise.m
235 lines (183 loc) · 8.16 KB
/
gpfinalise.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
function gp = gpfinalise(gp)
%GPFINALISE Finalises a run.
%
% GP = GPFINALISE(GP) finalises the GP structure after a run.
%
% Copyright (c) 2009-2015 Dominic Searson
%
% GPTIPS 2
%
% See also RUNGP, GPINIT, GPCHECK, GPDEFAULTS, UPDATESTATS
if ~gp.runcontrol.quiet
disp('Finalising run.')
end;
gp.state.run_completed = true;
gp.info.stopTime = datestr(now,0);
gp.info.runTime = [num2str(gp.state.runTimeElapsed/60,2) ' min.'];
mgregressmodel = false; %flag to signify a multigene regression model
if strncmpi(func2str(gp.fitness.fitfun),'regressmulti',12)
gp.fitness.r2train = zeros(gp.runcontrol.pop_size,1);
mgregressmodel = true;
end
%for multigene regression models, set up storage for validation and test
%data r2 for each model in final population
valdata = false; testdata = false;
if mgregressmodel
if isfield(gp.userdata,'xval') && ~isempty(gp.userdata.xval)
gp.fitness.r2val = zeros(gp.runcontrol.pop_size,1);
minValRMSE = gp.results.valbest.valfitness;
valdata = true;
end
if isfield(gp.userdata,'xtest') && ~isempty(gp.userdata.xtest)
gp.fitness.r2test = zeros(gp.runcontrol.pop_size,1);
minTestRMSE = Inf;
testdata = true;
gp.results.testbest.about = 'Best individual on testing data (in final population)';
end
end
%perform final population processing
for i=1:gp.runcontrol.pop_size
%compute node count and expressional complexity for each individual
%regardless of whether node count or complexity was used as fitness
%selection criteria during run.
gp.fitness.complexity(i,1) = getcomplexity(gp.pop{i});
gp.fitness.nodecount(i,1) = getnumnodes(gp.pop{i});
%if the run was a multigene symbolic regression run then calculate all
%R2 values and add them to the GP data structure.
if mgregressmodel
%skip bad models (they will be given R2 = 0)
if isinf(gp.fitness.values(i))
continue;
end
gpmodel = gpmodel2struct(gp,i,false,false,false);
%store training/validation/test r2 for all valid models
if gpmodel.valid
gp.fitness.r2train(i,1) = gpmodel.train.r2;
if valdata && ~gpmodel.val.warning
gp.fitness.r2val(i,1) = gpmodel.val.r2;
%check if any model in final population is
%better at predicting the validation data
%than the model stored in gp.results.valbest during the run
if gpmodel.val.rmse < minValRMSE
minValRMSE = gpmodel.val.rmse;
gp.results.valbest.complexity = gp.fitness.complexity(i,1);
gp.results.valbest.nodecount = gp.fitness.nodecount(i,1);
gp.results.valbest.fitness = gpmodel.train.rmse;
gp.results.valbest.testfitness = gpmodel.test.rmse;
gp.results.valbest.valfitness = gpmodel.val.rmse;
gp.results.valbest.r2train = gpmodel.train.r2;
gp.results.valbest.r2test = gpmodel.test.r2;
gp.results.valbest.r2val = gpmodel.val.r2;
gp.results.valbest.foundatgen = gp.state.count - 1;
gp.results.valbest.individual = gpmodel.genes.geneStrs;
gp.results.valbest.returnvalues = gpmodel.genes.geneWeights;
gp.results.valbest.eval_individual = tree2evalstr(gp.results.valbest.individual,gp);
end
end
end
%process test data related results
if testdata && ~gpmodel.test.warning
gp.fitness.r2test(i,1) = gpmodel.test.r2;
if gpmodel.test.rmse < minTestRMSE
minTestRMSE = gpmodel.test.rmse;
gp.results.testbest.complexity = gp.fitness.complexity(i,1);
gp.results.testbest.nodecount = gp.fitness.nodecount(i,1);
gp.results.testbest.fitness = gpmodel.train.rmse;
gp.results.testbest.testfitness = gpmodel.test.rmse;
gp.results.testbest.r2train = gpmodel.train.r2;
gp.results.testbest.r2test = gpmodel.test.r2;
gp.results.testbest.individual = gpmodel.genes.geneStrs;
gp.results.testbest.returnvalues = gpmodel.genes.geneWeights;
gp.results.testbest.eval_individual = tree2evalstr(gp.results.testbest.individual,gp);
if valdata && ~gpmodel.val.warning
gp.results.testbest.r2val = gpmodel.val.r2;
gp.results.testbest.valfitness = gpmodel.val.r2;
end
end
end
end
end %end of loop through population
%multigene regression specific tasks
if mgregressmodel
%process best model on training
gpmodel = gpmodel2struct(gp,'best',false,false,false);
if gpmodel.valid
gp.results.best.r2train = gpmodel.train.r2;
if ~gpmodel.test.warning
gp.results.best.r2test = gpmodel.test.r2;
gp.results.best.testfitness = gpmodel.test.rmse;
end
if ~gpmodel.val.warning
gp.results.best.r2val = gpmodel.val.r2;
gp.results.best.testfitness = gpmodel.test.rmse;
end
else
gp.results.best.r2train = 0;
end
end
gp.results.best = orderfields(gp.results.best);
%ensure both expressional complexity and node count are computed for
%'valbest' individual
if isfield(gp.results,'valbest')
gp.results.valbest.about = 'Best individual on validation data';
gp.results.valbest.nodecount = getnumnodes(gp.results.valbest.individual);
gp.results.valbest.complexity = getcomplexity(gp.results.valbest.individual);
if mgregressmodel
gpmodel = gpmodel2struct(gp,'valbest',false,false,false);
if gpmodel.valid
gp.results.valbest.r2train = gpmodel.train.r2;
if ~gpmodel.test.warning
gp.results.valbest.r2test = gpmodel.test.r2;
gp.results.valbest.testfitness = gpmodel.test.rmse;
end
if ~gpmodel.val.warning
gp.results.valbest.r2val = gpmodel.val.r2;
gp.results.valbest.testfitness = gpmodel.test.rmse;
end
else
gp.results.valbest.r2train = 0;
end
end
gp.results.valbest = orderfields(gp.results.valbest);
end
%make sure history fields are right size
gp.results.history.bestfitness = gp.results.history.bestfitness(1:gp.state.count);
gp.results.history.meanfitness = gp.results.history.meanfitness(1:gp.state.count);
gp.results.history.std_devfitness = gp.results.history.std_devfitness(1:gp.state.count);
if isfield(gp.results.history,'valfitness')
gp.results.history.valfitness = gp.results.history.valfitness(1:gp.state.count);
end
%reset warning status to user's
warning(gp.info.log0state.state,'MATLAB:log:logOfZero');
gp.info = rmfield(gp.info,'log0state');
%get rid of timer field
gp.state = rmfield(gp.state,'tic');
%remove fitness cache
if gp.runcontrol.usecache
gp.fitness = rmfield(gp.fitness,'cache');
end
if ~gp.runcontrol.quiet
disp(['GPTIPS run complete in ' gp.info.runTime] );
disp(['Best fitness acheived: ' num2str(gp.results.best.fitness)]);
disp('-----------------------------------------------------------');
end
gp = orderfields(gp);
gp.source = 'rungp';
function y = isNumberwang(x)
%ISNUMBERWANG Determines whether an input X is numberwang.
%
% Y = ISNUMBERWANG(X)
%
% Remarks: only works for numeric X. ObjectWang currently undefined.
%
% Copyright (c) 2009-2015 Dominic Searson
%
% GPTIPS 2
if isnumeric(x)
%only do computation on real part of x
x = real(x);
y = logical(airy(1000.*x));
% TODO: need quaternions tbx
else
y = false(size(x));
end