@@ -169,17 +169,9 @@ def setOptimizer(self, **kwargs):
169
169
if self .params .get ('ACCUMULATE_GRADIENTS' , 1 ) > 1 and self .params ['OPTIMIZER' ].lower () != 'adam' :
170
170
logging .warning ('Gradient accumulate is only implemented for the Adam optimizer. Setting "ACCUMULATE_GRADIENTS" to 1.' )
171
171
self .params ['ACCUMULATE_GRADIENTS' ] = 1
172
- if self .verbose > 0 :
173
- logging .info ("Preparing optimizer: %s [LR: %s - LOSS: %s - "
174
- "CLIP_C %s - CLIP_V %s - LR_OPTIMIZER_DECAY %s - ACCUMULATE_GRADIENTS %s] and compiling." %
175
- (str (self .params ['OPTIMIZER' ]),
176
- str (self .params .get ('LR' , 0.01 )),
177
- str (self .params .get ('LOSS' , 'categorical_crossentropy' )),
178
- str (self .params .get ('CLIP_C' , 0. )),
179
- str (self .params .get ('CLIP_V' , 0. )),
180
- str (self .params .get ('LR_OPTIMIZER_DECAY' , 0.0 )),
181
- str (self .params .get ('ACCUMULATE_GRADIENTS' , 1 ))
182
- ))
172
+
173
+ optimizer_str = '\t LR: ' + str (self .params .get ('LR' , 0.01 )) + \
174
+ '\n \t LOSS: ' + str (self .params .get ('LOSS' , 'categorical_crossentropy' ))
183
175
184
176
if self .params .get ('USE_TF_OPTIMIZER' , False ) and K .backend () == 'tensorflow' :
185
177
if self .params ['OPTIMIZER' ].lower () not in ['sgd' , 'adagrad' , 'adadelta' , 'rmsprop' , 'adam' ]:
@@ -200,19 +192,36 @@ def setOptimizer(self, **kwargs):
200
192
optimizer = TFOptimizer (tf .train .MomentumOptimizer (self .params .get ('LR' , 0.01 ),
201
193
self .params .get ('MOMENTUM' , 0.0 ),
202
194
use_nesterov = self .params .get ('NESTEROV_MOMENTUM' , False )))
195
+ optimizer_str += '\n \t MOMENTUM: ' + str (self .params .get ('MOMENTUM' , 0.0 )) + \
196
+ '\n \t NESTEROV: ' + str (self .params .get ('NESTEROV_MOMENTUM' , False ))
197
+
203
198
elif self .params ['OPTIMIZER' ].lower () == 'adam' :
204
199
optimizer = TFOptimizer (tf .train .AdamOptimizer (learning_rate = self .params .get ('LR' , 0.01 ),
200
+ beta1 = self .params .get ('BETA_1' , 0.9 ),
201
+ beta2 = self .params .get ('BETA_2' , 0.999 ),
205
202
epsilon = self .params .get ('EPSILON' , 1e-7 )))
203
+ optimizer_str += '\n \t BETA_1: ' + str (self .params .get ('BETA_1' , 0.9 )) + \
204
+ '\n \t BETA_2: ' + str (self .params .get ('BETA_2' , 0.999 )) + \
205
+ '\n \t EPSILON: ' + str (self .params .get ('EPSILON' , 1e-7 ))
206
+
206
207
elif self .params ['OPTIMIZER' ].lower () == 'adagrad' :
207
208
optimizer = TFOptimizer (tf .train .AdagradOptimizer (self .params .get ('LR' , 0.01 )))
209
+
208
210
elif self .params ['OPTIMIZER' ].lower () == 'rmsprop' :
209
211
optimizer = TFOptimizer (tf .train .RMSPropOptimizer (self .params .get ('LR' , 0.01 ),
210
212
decay = self .params .get ('LR_OPTIMIZER_DECAY' , 0.0 ),
211
213
momentum = self .params .get ('MOMENTUM' , 0.0 ),
212
214
epsilon = self .params .get ('EPSILON' , 1e-7 )))
215
+ optimizer_str += '\n \t MOMENTUM: ' + str (self .params .get ('MOMENTUM' , 0.9 )) + \
216
+ '\n \t EPSILON: ' + str (self .params .get ('EPSILON' , 1e-7 ))
217
+
213
218
elif self .params ['OPTIMIZER' ].lower () == 'adadelta' :
214
219
optimizer = TFOptimizer (tf .train .AdadeltaOptimizer (learning_rate = self .params .get ('LR' , 0.01 ),
220
+ rho = self .params .get ('RHO' , 0.95 ),
215
221
epsilon = self .params .get ('EPSILON' , 1e-7 )))
222
+ optimizer_str += '\n \t RHO: ' + str (self .params .get ('RHO' , 0.9 )) + \
223
+ '\n \t EPSILON: ' + str (self .params .get ('EPSILON' , 1e-7 ))
224
+
216
225
else :
217
226
raise Exception ('\t The chosen optimizer is not implemented.' )
218
227
else :
@@ -223,6 +232,8 @@ def setOptimizer(self, **kwargs):
223
232
nesterov = self .params .get ('NESTEROV_MOMENTUM' , False ),
224
233
clipnorm = self .params .get ('CLIP_C' , 0. ),
225
234
clipvalue = self .params .get ('CLIP_V' , 0. ))
235
+ optimizer_str += '\n \t MOMENTUM: ' + str (self .params .get ('MOMENTUM' , 0.0 )) + \
236
+ '\n \t NESTEROV: ' + str (self .params .get ('NESTEROV_MOMENTUM' , False ))
226
237
227
238
elif self .params ['OPTIMIZER' ].lower () == 'rsmprop' :
228
239
optimizer = RMSprop (lr = self .params .get ('LR' , 0.001 ),
@@ -231,13 +242,16 @@ def setOptimizer(self, **kwargs):
231
242
clipnorm = self .params .get ('CLIP_C' , 0. ),
232
243
clipvalue = self .params .get ('CLIP_V' , 0. ),
233
244
epsilon = self .params .get ('EPSILON' , 1e-7 ))
245
+ optimizer_str += '\n \t RHO: ' + str (self .params .get ('RHO' , 0.9 )) + \
246
+ '\n \t EPSILON: ' + str (self .params .get ('EPSILON' , 1e-7 ))
234
247
235
248
elif self .params ['OPTIMIZER' ].lower () == 'adagrad' :
236
249
optimizer = Adagrad (lr = self .params .get ('LR' , 0.01 ),
237
250
decay = self .params .get ('LR_OPTIMIZER_DECAY' , 0.0 ),
238
251
clipnorm = self .params .get ('CLIP_C' , 0. ),
239
252
clipvalue = self .params .get ('CLIP_V' , 0. ),
240
253
epsilon = self .params .get ('EPSILON' , 1e-7 ))
254
+ optimizer_str += '\n \t EPSILON: ' + str (self .params .get ('EPSILON' , 1e-7 ))
241
255
242
256
elif self .params ['OPTIMIZER' ].lower () == 'adadelta' :
243
257
optimizer = Adadelta (lr = self .params .get ('LR' , 1.0 ),
@@ -246,6 +260,8 @@ def setOptimizer(self, **kwargs):
246
260
clipnorm = self .params .get ('CLIP_C' , 0. ),
247
261
clipvalue = self .params .get ('CLIP_V' , 0. ),
248
262
epsilon = self .params .get ('EPSILON' , 1e-7 ))
263
+ optimizer_str += '\n \t RHO: ' + str (self .params .get ('RHO' , 0.9 )) + \
264
+ '\n \t EPSILON: ' + str (self .params .get ('EPSILON' , 1e-7 ))
249
265
250
266
elif self .params ['OPTIMIZER' ].lower () == 'adam' :
251
267
if self .params .get ('ACCUMULATE_GRADIENTS' ) > 1 :
@@ -258,6 +274,11 @@ def setOptimizer(self, **kwargs):
258
274
clipvalue = self .params .get ('CLIP_V' , 0. ),
259
275
epsilon = self .params .get ('EPSILON' , 1e-7 ),
260
276
accum_iters = self .params .get ('ACCUMULATE_GRADIENTS' ))
277
+ optimizer_str += '\n \t BETA_1: ' + str (self .params .get ('BETA_1' , 0.9 )) + \
278
+ '\n \t BETA_2: ' + str (self .params .get ('BETA_2' , 0.999 )) + \
279
+ '\n \t AMSGRAD: ' + str (self .params .get ('AMSGRAD' , False )) + \
280
+ '\n \t ACCUMULATE_GRADIENTS: ' + str (self .params .get ('ACCUMULATE_GRADIENTS' )) + \
281
+ '\n \t EPSILON: ' + str (self .params .get ('EPSILON' , 1e-7 ))
261
282
else :
262
283
optimizer = Adam (lr = self .params .get ('LR' , 0.001 ),
263
284
beta_1 = self .params .get ('BETA_1' , 0.9 ),
@@ -267,6 +288,10 @@ def setOptimizer(self, **kwargs):
267
288
clipnorm = self .params .get ('CLIP_C' , 0. ),
268
289
clipvalue = self .params .get ('CLIP_V' , 0. ),
269
290
epsilon = self .params .get ('EPSILON' , 1e-7 ))
291
+ optimizer_str += '\n \t BETA_1: ' + str (self .params .get ('BETA_1' , 0.9 )) + \
292
+ '\n \t BETA_2: ' + str (self .params .get ('BETA_2' , 0.999 )) + \
293
+ '\n \t AMSGRAD: ' + str (self .params .get ('AMSGRAD' , False )) + \
294
+ '\n \t EPSILON: ' + str (self .params .get ('EPSILON' , 1e-7 ))
270
295
271
296
elif self .params ['OPTIMIZER' ].lower () == 'adamax' :
272
297
optimizer = Adamax (lr = self .params .get ('LR' , 0.002 ),
@@ -276,7 +301,9 @@ def setOptimizer(self, **kwargs):
276
301
clipnorm = self .params .get ('CLIP_C' , 0. ),
277
302
clipvalue = self .params .get ('CLIP_V' , 0. ),
278
303
epsilon = self .params .get ('EPSILON' , 1e-7 ))
279
-
304
+ optimizer_str += '\n \t BETA_1: ' + str (self .params .get ('BETA_1' , 0.9 )) + \
305
+ '\n \t BETA_2: ' + str (self .params .get ('BETA_2' , 0.999 )) + \
306
+ '\n \t EPSILON: ' + str (self .params .get ('EPSILON' , 1e-7 ))
280
307
elif self .params ['OPTIMIZER' ].lower () == 'nadam' :
281
308
optimizer = Nadam (lr = self .params .get ('LR' , 0.002 ),
282
309
beta_1 = self .params .get ('BETA_1' , 0.9 ),
@@ -285,10 +312,57 @@ def setOptimizer(self, **kwargs):
285
312
clipnorm = self .params .get ('CLIP_C' , 0. ),
286
313
clipvalue = self .params .get ('CLIP_V' , 0. ),
287
314
epsilon = self .params .get ('EPSILON' , 1e-7 ))
315
+ optimizer_str += '\n \t BETA_1: ' + str (self .params .get ('BETA_1' , 0.9 )) + \
316
+ '\n \t BETA_2: ' + str (self .params .get ('BETA_2' , 0.999 )) + \
317
+ '\n \t EPSILON: ' + str (self .params .get ('EPSILON' , 1e-7 ))
318
+
319
+ elif self .params ['OPTIMIZER' ].lower () == 'sgdhd' :
320
+ optimizer = SGDHD (lr = self .params .get ('LR' , 0.002 ),
321
+ clipnorm = self .params .get ('CLIP_C' , 10. ),
322
+ clipvalue = self .params .get ('CLIP_V' , 0. ),
323
+ hypergrad_lr = self .params .get ('HYPERGRAD_LR' , 0.001 ))
324
+ optimizer_str += '\n \t HYPERGRAD_LR: ' + str (self .params .get ('HYPERGRAD_LR' , 0.001 ))
325
+
326
+ elif self .params ['OPTIMIZER' ].lower () == 'qhsgd' :
327
+ optimizer = QHSGD (lr = self .params .get ('LR' , 0.002 ),
328
+ momentum = self .params .get ('MOMENTUM' , 0.0 ),
329
+ quasi_hyperbolic_momentum = self .params .get ('QUASI_HYPERBOLIC_MOMENTUM' , 0.0 ),
330
+ decay = self .params .get ('LR_OPTIMIZER_DECAY' , 0.0 ),
331
+ nesterov = self .params .get ('NESTEROV_MOMENTUM' , False ),
332
+ dampening = self .params .get ('DAMPENING' , 0. ),
333
+ clipnorm = self .params .get ('CLIP_C' , 10. ),
334
+ clipvalue = self .params .get ('CLIP_V' , 0. ))
335
+ optimizer_str += '\n \t MOMENTUM: ' + str (self .params .get ('MOMENTUM' , 0.0 )) + \
336
+ '\n \t QUASI_HYPERBOLIC_MOMENTUM: ' + str (self .params .get ('QUASI_HYPERBOLIC_MOMENTUM' , 0.0 )) + \
337
+ '\n \t DAMPENING: ' + str (self .params .get ('DAMPENING' , 0.0 )) + \
338
+ '\n \t NESTEROV: ' + str (self .params .get ('NESTEROV_MOMENTUM' , False ))
339
+
340
+ elif self .params ['OPTIMIZER' ].lower () == 'qhsgdhd' :
341
+ optimizer = QHSGDHD (lr = self .params .get ('LR' , 0.002 ),
342
+ momentum = self .params .get ('MOMENTUM' , 0.0 ),
343
+ quasi_hyperbolic_momentum = self .params .get ('QUASI_HYPERBOLIC_MOMENTUM' , 0.0 ),
344
+ dampening = self .params .get ('DAMPENING' , 0. ),
345
+ hypergrad_lr = self .params .get ('HYPERGRAD_LR' , 0.001 ),
346
+ decay = self .params .get ('LR_OPTIMIZER_DECAY' , 0.0 ),
347
+ nesterov = self .params .get ('NESTEROV_MOMENTUM' , False ),
348
+ clipnorm = self .params .get ('CLIP_C' , 10. ),
349
+ clipvalue = self .params .get ('CLIP_V' , 0. ))
350
+ optimizer_str += '\n \t MOMENTUM: ' + str (self .params .get ('MOMENTUM' , 0.0 )) + \
351
+ '\n \t QUASI_HYPERBOLIC_MOMENTUM: ' + str (self .params .get ('QUASI_HYPERBOLIC_MOMENTUM' , 0.0 )) + \
352
+ '\n \t HYPERGRAD_LR: ' + str (self .params .get ('HYPERGRAD_LR' , 0.001 )) + \
353
+ '\n \t DAMPENING: ' + str (self .params .get ('DAMPENING' , 0.0 )) + \
354
+ '\n \t NESTEROV: ' + str (self .params .get ('NESTEROV_MOMENTUM' , False ))
288
355
else :
289
356
logging .info ('\t WARNING: The modification of the LR is not implemented for the chosen optimizer.' )
290
357
optimizer = eval (self .params ['OPTIMIZER' ])
291
358
359
+ optimizer_str += '\n \t CLIP_C ' + str (self .params .get ('CLIP_C' , 0. )) + \
360
+ '\n \t CLIP_V ' + str (self .params .get ('CLIP_V' , 0. )) + \
361
+ '\n \t LR_OPTIMIZER_DECAY ' + str (self .params .get ('LR_OPTIMIZER_DECAY' , 0.0 )) + \
362
+ '\n \t ACCUMULATE_GRADIENTS ' + str (self .params .get ('ACCUMULATE_GRADIENTS' , 1 )) + '\n '
363
+ if self .verbose > 0 :
364
+ logging .info ("Preparing optimizer and compiling. Optimizer configuration: \n " + optimizer_str )
365
+
292
366
if hasattr (self , 'multi_gpu_model' ) and self .multi_gpu_model is not None :
293
367
model_to_compile = self .multi_gpu_model
294
368
else :
0 commit comments