-
Notifications
You must be signed in to change notification settings - Fork 11
/
Copy pathgpdemo4.m
137 lines (128 loc) · 5.07 KB
/
gpdemo4.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
% GPDEMO4 GPTIPS 2 demo of multigene symbolic regression on a concrete compressive strength data set.
%
% The output being modelled is concrete compressive strength (MPa) and
% the input variables are:
%
% Cement (x1) - kg in a m3 mixture
% Blast furnace slag (x2) - kg in a m3 mixture
% Fly ash (x3) - kg in a m3 mixture
% Water (x4) - kg in a m3 mixture
% Superplasticiser (x5) - kg in a m3 mixture
% Coarse aggregate (x6) - kg in a m3 mixture
% Fine aggregate (x7) - kg in a m3 mixture
% Age (x8) - range 1 - 365 days
%
% Demonstrates feature selection in multigene symbolic regression and
% some post run analysis functions.
%
% (c) Dominic Searson 2009-2015
%
% GPTIPS 2
%
% See also GPDEMO4_CONFIG, GPDEMO1, GPDEMO2, GPDEMO4, PARETOREPORT,
% GPMODELREPORT, DRAWTREES, SUMMARY, RUNTREE, GPPRETTY, POPBROWSER
clc;
disp('GPTIPS 2 Demo 4: feature selection with concrete compressive strength data set');
disp('------------------------------------------------------------------------------');
disp('The output being modelled is concrete compressive strength (MPa) and');
disp('the input variables are:');
disp(' ');
disp(' Cement (x1) - kg in a m3 mixture');
disp(' Blast furnace slag (x2) - kg in a m3 mixture');
disp(' Fly ash (x3) - kg in a m3 mixture');
disp(' Water (x4) - kg in a m3 mixture');
disp(' Superplasticiser (x5) - kg in a m3 mixture');
disp(' Coarse aggregate (x6) - kg in a m3 mixture');
disp(' Fine aggregate (x7) - kg in a m3 mixture');
disp(' Age (x8) - range 1 - 365 days');
disp(' ');
disp('To demonstrate feature selection in GPTIPS another 50 variables ');
disp('consisting of normally distributed noise have been added to form the');
disp('input variables x9 to x58.');
disp(' ');
disp('The configuration file is gpdemo4_config.m and the raw data is in');
disp('concrete.mat');
disp(' ');
disp('The data has been divided into a training set, a holdout validation set');
disp('and a testing set.');
disp(' ');
disp('GPTIPS is run twice for a maximum of 30 seconds per run or until a');
disp('RMSE of 6.5 is reached. The runs are merged into a single population');
disp('at the end.');
disp(' ');
disp('6 genes are used (plus a bias term) so the form of the model will be');
disp('ypred = c0 + c1*tree1 + ... + c6*tree6');
disp('where ypred = predicted output, c0 = bias and c1,...,c6 are the gene weights.')
disp(' ');
disp('Genes are limited to a depth of 4.');
disp(' ');
disp('The function nodes used are:');
disp('TIMES MINUS PLUS RDIVIDE SQUARE TANH EXP LOG MULT3 ADD3 SQRT CUBE');
disp('POWER NEGEXP NEG ABS');
disp(' ');
disp('The input variables that appear in the best model on the training');
disp('and validation data sets can be displayed at run time by ');
disp('including the following two settings in gpdemo4_config.m : ');
disp(' ');
disp('gp.runcontrol.showBestInputs = true;');
disp('gp.runcontrol.showValBestInputs = true;');
disp(' ');
disp('GPTIPS is run with the configuration in gpdemo4_config.m using :');
disp('>>gp=rungp(@gpdemo4_config);');
disp('Press a key to continue');
disp(' ');
pause;
gp = rungp(@gpdemo4_config);
%Run the best val individual of the run on the fitness function
disp(' ');
disp('Evaluate the best validation individual of');
disp('the runs on the fitness function using:');
disp('>>runtree(gp,''valbest'');');
disp('Press a key to continue');
disp(' ');
pause;
runtree(gp,'valbest');
%If Symbolic Math toolbox is present
if gp.info.toolbox.symbolic
disp(' ');
disp('Next, use the the GPPRETTY command on the best validation individual: ');
disp('>>gppretty(gp,''valbest'')');
disp('Press a key to continue');
disp(' ');
pause;
gppretty(gp,'valbest');
disp(' ');
disp('If the runs have been successful, the only variables present in ');
disp('the best validation model should be the following:');
disp('Cement Slag Ash Water Plastic Course Fine Age');
disp('(these are defined as variable name aliases in gpdemo4_config.m');
disp('using the gp.nodes.inputs.names setting.)');
disp(' ');
disp('Less successful runs may contain the noise variables x9 - x58.');
disp('If the results seem poor, try running the demo again.');
end
disp('Press a key to continue');
disp(' ');
pause;
disp('To visualise at the frequency distribution of input variables in all');
disp('models with an R^2 >= 0.75 the GPPOPVARS function can be used.');
disp('This should show a high frequency of variables x1 - x8 and a low');
disp('frequency of the irrelevant noise inputs.');
disp('>>gppopvars(gp,0.75);');
disp(' ');
disp('Press a key to continue');
disp(' ');
pause
gppopvars(gp,0.75);
disp(' ');
if gp.info.toolbox.symbolic
disp('Finally, an HTML report listing the models on the Pareto optimal front');
disp('of model expressional complexity and performance can be generated using');
disp('the PARETOREPORT function.');
disp('>>paretoreport(gp)');
disp(' ');
disp('Press a key to continue');
disp(' ');
pause;
paretoreport(gp);
end