-
Notifications
You must be signed in to change notification settings - Fork 11
/
Copy pathdrawtrees.m
336 lines (287 loc) · 11.5 KB
/
drawtrees.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
function drawtrees(gp,ID,reportName,font,nodeTextColor,connectionLineStyle,nodeBorderStyle,nodeColor1,nodeColor2,boxShadow)
%DRAWTREES Draws the tree structure(s) of an individual in a web browser.
%
% DRAWTREES function will draw the 'trees' in the selected individual
% regardless of whether it is a regression model or not. The style and
% colours used to draw the trees can be controlled by additional CSS
% arguments as outlined below.
%
% DRAWTREES(GP,ID) draws the trees in trees.htm for the population member
% with numeric identifer ID in the GPTIPS datastructure GP.
%
% DRAWTREES(GP,ID,'REPORTNAME') draws the trees in REPORTNAME.htm.
%
% DRAWTREES(GP,'best','REPORTNAME') does the same for the best individual
% of the run (as evaluated on training data).
%
% DRAWTREES(GP,'valbest','REPORTNAME') does the same for the individual
% that performed best on the validation data (if this data exists).
%
% DRAWTREES(GP,'testbest','REPORTNAME') does the same for the individual
% that performed best on the test data (if this data exists).
%
% DRAWTREES(GP,GPMODEL) operates on the GPMODEL struct representing a
% multigene regression model, i.e. the struct returned by the functions
% GPMODEL2STRUCT or GENES2GPMODEL.
%
% DRAWTREES(GP,EXPR) where EXPR is a cell array of encoded tree strings
% draws the corresponding trees. Each element of EXPR must be a valid
% encoded tree expression such as that generated by TREEGEN. For example,
% to draw 2 randomly generated trees:
%
% EXPR{1} = TREEGEN(GP);EXPR{2} = TREEGEN(GP);
% DRAWTREES(GP,EXPR);
%
% Advanced formatting:
%
% The default colours and formatting options are intended to be sober and
% for use in publications etc. To change these you can supply the
% following additional command line parameters.
%
% DRAWTREES(GP,ID,REPORTNAME,FONT,NODETEXTCOLOR,CONNECTIONLINESTYLE,NODEBORDERSTYLE,NODECOLOR1,NODECOLOR2,BOXSHADOW)
%
% Valid CSS styles should be used as the additional parameters as
% follows. Any parameter left empty ([]) will use the defaults.
%
% FONT should be a valid CSS font or font-family, e.g. arial or 'Open
% Sans'. As well as the standard web safe fonts, popular 'google fonts'
% supported are: 'Open Sans','Roboto','Oxygen' and 'PT Sans'. This
% specifies the font used in the tree nodes and the font used in the
% report text. The default is 'Open Sans'.
%
% NODETEXTCOLOR is a string containing CSS for the node text color, e.g.
% 'black' or '#ED145A'. The default is 'black'.
%
% CONNECTIONLINESTYLE is a string containing CSS for the lines connecting
% the nodes, e.g. '2px solid black' or '1px solid #ED145A'. The default
% is '2px solid black'.
%
% NODEBORDERSTYLE is a string containing CSS for the node borders e.g.
% '2px solid blue' or '5px solid #18D8F0'. The default is '2px solid
% black'.
%
% NODECOLOR 1 and NODECOLOR2 are strings containing CSS colours, e.g.
% #18D8F0' or 'blue'. The node is drawn with a linear gradient fill from
% top to bottom starting with NODECOLOR1 and ending with NODECOLOR2. The
% defaults are 'white'.
%
% BOXSHADOW is a boolean with TRUE = use a node box shadow and FALSE = no
% shadow. The default is FALSE.
%
% Examples:
%
% To use all the style defaults, but to change the font to 'Roboto' use
%
% drawtrees(gp,'best',[],'Roboto')
%
% For a highly not recommended 'Miami Vice' vibe use
%
% drawtrees(gp,'best',[],'Comic Sans MS','yellow','8px solid pink','3px solid #ED145A','#18D8F0','#F090C0',true)
%
% Remarks:
%
% This function connects to the Google Visualization API and hence
% internet connectivity is required.
%
% See the following link for details of Google org charts:
% https://developers.google.com/chart/interactive/docs/gallery/orgchart
%
% Copyright (c) 2009-2015 Dominic Searson
%
% GPTIPS 2
%
% See also PROCESSORGCHARTJS, GPTREESTRUCTURE, GPMODELREPORT
if nargin < 2
disp('Basic usage is DRAWTREES(GP,ID) where ID is a population member identifier, e.g.');
disp('DRAWTREES(GP,26) or');
disp('DRAWTREES(GP,''BEST'')');
disp('DRAWTREES(GP,''VALBEST'')');
disp('DRAWTREES(GP,''TESTBEST'')');
return;
end
if nargin < 3 || isempty(reportName)
reportName = 'trees';
end
if nargin < 4 || isempty(font)
font = 'Open Sans';
end
if nargin < 5 || isempty(nodeTextColor)
nodeTextColor = 'black';
end
if nargin < 6 || isempty(connectionLineStyle)
connectionLineStyle= '2px solid black';
end
if nargin < 7 || isempty(nodeBorderStyle)
nodeBorderStyle = '2px solid black';
end
if nargin < 8 || isempty(nodeColor1)
nodeColor1 = 'white';
end
if nargin < 9 || isempty(nodeColor2)
nodeColor2 = 'white';
end
if nargin < 10 || isempty(boxShadow)
boxShadow = false;
end
%extract data set name if defined
if ~isempty(gp.userdata.name)
setname = ['Data: ' gp.userdata.name];
else
setname = '';
end
%gpmodel struct supplied
if isa(ID,'struct') && isfield(ID,'valid')
gpmodel = ID;
dispId = 'user model';
%numeric population index supplied
elseif isnumeric(ID)
if ID > gp.runcontrol.pop_size || ID < 1
error('Supplied numeric individual identifier is invalid.');
end
gpmodel.genes.geneStrs = gp.pop{ID};
gpmodel.genes.num_genes = numel(gpmodel.genes.geneStrs);
dispId = num2str(ID);
elseif ischar(ID) && strcmpi(ID,'best')
gpmodel.genes.geneStrs = gp.results.best.individual;
gpmodel.genes.num_genes = numel(gpmodel.genes.geneStrs);
dispId = ID;
elseif ischar(ID) && strcmpi(ID,'valbest')
% check that validation data is present
if ~isfield(gp.results,'valbest')
disp('No validation data was found. Try drawtrees(gp,''best'') instead.');
return;
end
gpmodel.genes.geneStrs = gp.results.valbest.individual;
gpmodel.genes.num_genes = numel(gpmodel.genes.geneStrs);
dispId = ID;
elseif ischar(ID) && strcmpi(ID,'testbest')
% check that validation data is present
if ~isfield(gp.results,'testbest')
disp('No test data was found. Try drawtrees(gp,''best'') instead.');
return;
end
gpmodel.genes.geneStrs = gp.results.testbest.individual;
gpmodel.genes.num_genes = numel(gpmodel.genes.geneStrs);
dispId = ID;
%cell array of encoded tree strings
elseif iscell(ID) && ischar(ID{1})
gpmodel.genes.geneStrs = ID;
gpmodel.genes.num_genes = numel(ID);
dispId = 'Cell array of encoded trees';
%single encoded tree string
elseif ischar(ID)
gpmodel.genes.geneStrs = {ID};
gpmodel.genes.num_genes = 1;
dispId = ID;
else
error('Invalid selection.');
end
%create html file
if ~ischar(reportName)
error('The reportname parameter must be a string.');
end
htmlFileName = [reportName '.htm'];
fid = fopen(htmlFileName,'wt+');
if fid == -1
error(['Could not open the file ' htmlFileName ' for writing.']);
end
%generate html header info
fprintf(fid,'<!DOCTYPE html>');
fprintf(fid,'\n');
fprintf(fid,'<html lang="en">\n');
fprintf(fid,'<head>\n');
fprintf(fid,'<meta http-equiv="content-type" content="text/html; charset=utf-8" name="description" content="GPTIPS 2 Trees" name="author" content="Dominic Searson"/>\n');
fprintf(fid,['<title>GPTIPS Trees. Config: ' char(gp.info.configFile) '. Trees from individual: ' dispId '</title>\n']);
%popular google fonts
fprintf(fid,'<link href=''http://fonts.googleapis.com/css?family=Open+Sans'' rel=''stylesheet'' type=''text/css''>');
fprintf(fid,'<link href=''http://fonts.googleapis.com/css?family=Roboto'' rel=''stylesheet'' type=''text/css''>');
fprintf(fid,'<link href=''http://fonts.googleapis.com/css?family=Oxygen'' rel=''stylesheet'' type=''text/css''>');
fprintf(fid,'<link href=''http://fonts.googleapis.com/css?family=PT+Sans'' rel=''stylesheet'' type=''text/css''>');
%load vis from Google
fprintf(fid,'<script type="text/javascript" src="http://www.google.com/jsapi"></script>\n');
fprintf(fid,'<script type="text/javascript">google.load(''visualization'', ''1'', {packages: [''orgchart'']});\n');
fprintf(fid,'</script>\n');
%node CSS
fprintf(fid,'<style>');
fprintf(fid,'.google-visualization-orgchart-node {');
fprintf(fid,['color: ' nodeTextColor ' ;']);
fprintf(fid,'text-align: center;');
fprintf(fid,'vertical-align: middle;');
fprintf(fid,['font-family: ''' font ''',''Helvetica Neue'',helvetica,arial,sans-serif;']);
fprintf(fid,['border: ' nodeBorderStyle ' ;']);
fprintf(fid,['background-color: ' nodeColor1 ';']);
fprintf(fid,['background: -webkit-gradient(linear, left top, left bottom, from(' nodeColor1 '), to(' nodeColor2 '));']);
fprintf(fid,'vertical-align: middle;');
if ~boxShadow
fprintf(fid,'box-shadow: none;');
fprintf(fid,'-webkit-box-shadow: none;');
fprintf(fid,'-moz-box-shadow: none;');
end
fprintf(fid,'}');
fprintf(fid,'h1, h2, h3 {\n');
fprintf(fid,'color: #0073bd; ');
fprintf(fid,'margin-top: 20px; ');
fprintf(fid,'\n}\n');
fprintf(fid,'</style>');
%connecting line CSS
fprintf(fid,'<style>');
fprintf(fid,'.google-visualization-orgchart-lineleft {');
fprintf(fid,[' border-left: ' connectionLineStyle ' ; }']);
fprintf(fid,'.google-visualization-orgchart-lineright {');
fprintf(fid,[' border-right: ' connectionLineStyle ' ; }']);
fprintf(fid,'.google-visualization-orgchart-linebottom {');
fprintf(fid,[' border-bottom: ' connectionLineStyle ' ; }']);
fprintf(fid,'</style>');
%generate JS for tree charts
processOrgChartJS(fid,gp,gpmodel);
%end head
fprintf(fid,'</head>');
%generate html body
fprintf(fid,['<body style="font-family: ''' font ''',''Helvetica Neue'', helvetica, arial, sans-serif; ">']);
fprintf(fid,'<div style="text-align: left;margin-bottom: 30px; margin-top: 30px;margin-left: 30px;">');
fprintf(fid,'<h2>GPTIPS tree structure report</h2>');
if ~isempty(setname)
fprintf(fid,['<p class="text">' setname '</p>\n']);
end
if isnumeric(ID) && ID > 0 && ID <= gp.runcontrol.pop_size
fprintf(fid,['<p class="text">For individual with ID: ' int2str(ID) '</p>\n']);
elseif strcmpi(ID,'best')
fprintf(fid,'<p class="text">For best model on training data.</p>\n');
elseif strcmpi(ID,'valbest');
fprintf(fid,'<p class="text">For best model on validation data.</p>\n');
elseif strcmpi(ID,'testbest');
fprintf(fid,'<p class="text">For best model on test data.</p>\n');
elseif isa(ID,'struct') && isfield(ID,'valid')
fprintf(fid,'<p class="text">For a user generated model struct.</p>\n');
end
fprintf(fid,['<p class="date">' datestr(now) '</p>\n']);
%gene tree structures
for n=1:gpmodel.genes.num_genes
fprintf(fid,'<table>');
fprintf(fid,'<tr>');
fprintf(fid,'<td style="text-align:center;">');
fprintf(fid,['<p>Tree ' int2str(n) '</p><p style="color:gray;"> nodes = ' int2str(getnumnodes(gpmodel.genes.geneStrs{n}))...
' depth = ' int2str(getdepth(gpmodel.genes.geneStrs{n})) ' complexity = ' int2str(getcomplexity(gpmodel.genes.geneStrs{n})) '</p>\n']);
fprintf(fid,'</td>');
fprintf(fid,'</tr>');
fprintf(fid,'<tr>');
fprintf(fid,'<td>');
fprintf(fid,['<div id="tree' int2str(n) '" style="width: 300px;"></div>\n']);
fprintf(fid,'</td>');
fprintf(fid,'</tr>');
fprintf(fid,'</table>');
fprintf(fid,'<p></p>\n');
end
fprintf(fid,'</div>');
%footer
fprintf(fid,'<p> </p>\n');
fprintf(fid,'<p> </p>\n');
fprintf(fid,'<p style="color:gray;text-align:center;">GPTIPS - the symbolic data mining platform for MATLAB</p>');
fprintf(fid,'<p style="color:gray;text-align:center;">© Dominic Searson 2009-2015</p>');
%close
fprintf(fid,'</body>\n');
fprintf(fid,'</html>\n');
fclose(fid);
disp(['Trees drawn to ' reportName '.htm']);
disp('Opening in system browser.');
web(htmlFileName,'-browser');