diff --git a/dizoo/classic_control/cartpole/config/cartpole_a2c_config.py b/dizoo/classic_control/cartpole/config/cartpole_a2c_config.py index ec6f93cd6e..f68d8503d8 100644 --- a/dizoo/classic_control/cartpole/config/cartpole_a2c_config.py +++ b/dizoo/classic_control/cartpole/config/cartpole_a2c_config.py @@ -21,6 +21,7 @@ learning_rate=0.001, # (float) loss weight of the entropy regularization, the weight of policy network is set to 1 entropy_weight=0.01, + resume_training=False, ), collect=dict( # (int) collect n_sample data, train model n_iteration times diff --git a/dizoo/classic_control/cartpole/config/cartpole_pg_config.py b/dizoo/classic_control/cartpole/config/cartpole_pg_config.py index af3ee5ba04..808b70501d 100644 --- a/dizoo/classic_control/cartpole/config/cartpole_pg_config.py +++ b/dizoo/classic_control/cartpole/config/cartpole_pg_config.py @@ -18,6 +18,7 @@ batch_size=64, learning_rate=0.001, entropy_weight=0.001, + resume_training=False, ), collect=dict(n_episode=80, unroll_len=1, discount_factor=0.9), eval=dict(evaluator=dict(eval_freq=100, ), ), diff --git a/dizoo/classic_control/cartpole/config/cartpole_ppo_stdim_config.py b/dizoo/classic_control/cartpole/config/cartpole_ppo_stdim_config.py index 3f6060797c..6735f980cd 100644 --- a/dizoo/classic_control/cartpole/config/cartpole_ppo_stdim_config.py +++ b/dizoo/classic_control/cartpole/config/cartpole_ppo_stdim_config.py @@ -37,6 +37,7 @@ entropy_weight=0.01, clip_ratio=0.2, learner=dict(hook=dict(save_ckpt_after_iter=100)), + resume_training=False, ), collect=dict( n_sample=256, diff --git a/dizoo/classic_control/cartpole/config/cartpole_ppopg_config.py b/dizoo/classic_control/cartpole/config/cartpole_ppopg_config.py index 623a3b5048..4ee8462575 100644 --- a/dizoo/classic_control/cartpole/config/cartpole_ppopg_config.py +++ b/dizoo/classic_control/cartpole/config/cartpole_ppopg_config.py @@ -19,6 +19,7 @@ batch_size=64, learning_rate=0.001, entropy_weight=0.001, + resume_training=False, ), collect=dict(n_episode=80, unroll_len=1, discount_factor=0.9, collector=dict(get_train_sample=True)), eval=dict(evaluator=dict(eval_freq=100, ), ), diff --git a/dizoo/classic_control/pendulum/config/pendulum_ppo_config.py b/dizoo/classic_control/pendulum/config/pendulum_ppo_config.py index 151455aec1..6c58e78e64 100644 --- a/dizoo/classic_control/pendulum/config/pendulum_ppo_config.py +++ b/dizoo/classic_control/pendulum/config/pendulum_ppo_config.py @@ -35,6 +35,7 @@ adv_norm=True, value_norm=True, ignore_done=True, + resume_training=False, ), collect=dict( n_sample=5000, diff --git a/dizoo/petting_zoo/config/ptz_simple_spread_mappo_config.py b/dizoo/petting_zoo/config/ptz_simple_spread_mappo_config.py index 5eb1095a5a..910a8bb997 100644 --- a/dizoo/petting_zoo/config/ptz_simple_spread_mappo_config.py +++ b/dizoo/petting_zoo/config/ptz_simple_spread_mappo_config.py @@ -53,6 +53,7 @@ grad_clip_type='clip_norm', grad_clip_value=10, ignore_done=False, + resume_training=False, ), collect=dict( n_sample=3200,