From 671f4f0e3108056369ab2e7d544588920f463742 Mon Sep 17 00:00:00 2001 From: Artur Niederfahrenhorst Date: Thu, 28 Nov 2024 11:35:18 +0100 Subject: [PATCH 1/4] add comments --- rllib/examples/envs/custom_gym_env.py | 8 ++++++-- rllib/tuned_examples/ppo/atari_ppo.py | 20 ++++++++++++-------- 2 files changed, 18 insertions(+), 10 deletions(-) diff --git a/rllib/examples/envs/custom_gym_env.py b/rllib/examples/envs/custom_gym_env.py index 01fa5ecc452f..facf5357989e 100644 --- a/rllib/examples/envs/custom_gym_env.py +++ b/rllib/examples/envs/custom_gym_env.py @@ -45,12 +45,15 @@ | 18.3034 | 28000 | 0.908918 | 12.9676 | +------------------+-------+----------+--------------------+ """ +# Note(Artur): this lets us extract portions of the script on Anyscale +# ws-template-imports-start import gymnasium as gym from gymnasium.spaces import Discrete, Box import numpy as np import random from typing import Optional +# ws-template-imports-end from ray.rllib.utils.test_utils import ( add_rllib_example_script_args, @@ -70,7 +73,8 @@ "starting- and goal states.", ) - +# Note(Artur): this lets us extract portions of the script on Anyscale +# ws-template-code-start class SimpleCorridor(gym.Env): """Example of a custom env in which the agent has to walk down a corridor. @@ -124,7 +128,7 @@ def step(self, action): truncated, infos, ) - +# ws-template-code-end if __name__ == "__main__": args = parser.parse_args() diff --git a/rllib/tuned_examples/ppo/atari_ppo.py b/rllib/tuned_examples/ppo/atari_ppo.py index b4d881574f4e..6f212ba8d0a8 100644 --- a/rllib/tuned_examples/ppo/atari_ppo.py +++ b/rllib/tuned_examples/ppo/atari_ppo.py @@ -1,3 +1,5 @@ +# Note(Artur): this lets us extract portions of the script on Anyscale +# ws-template-imports-start import gymnasium as gym from ray import tune @@ -7,7 +9,7 @@ from ray.rllib.core.rl_module.default_model_config import DefaultModelConfig from ray.rllib.env.wrappers.atari_wrappers import wrap_atari_for_new_api_stack from ray.rllib.utils.test_utils import add_rllib_example_script_args - +# ws-template-imports-end parser = add_rllib_example_script_args( default_reward=float("inf"), @@ -22,7 +24,11 @@ # and (if needed) use their values to set up `config` below. args = parser.parse_args() +NUM_LEARNERS = args.num_learners or 1 +ENV = args.env +# Note(Artur): this lets us extract portions of the script on Anyscale +# ws-template-code-start def _make_env_to_module_connector(env): return FrameStackingEnvToModule(num_frames=4) @@ -35,7 +41,7 @@ def _make_learner_connector(input_observation_space, input_action_space): # We would like our frame stacking connector to do this job. def _env_creator(cfg): return wrap_atari_for_new_api_stack( - gym.make(args.env, **cfg, render_mode="rgb_array"), + gym.make(ENV, **cfg, render_mode="rgb_array"), # Perform frame-stacking through ConnectorV2 API. framestack=None, ) @@ -43,7 +49,6 @@ def _env_creator(cfg): tune.register_env("env", _env_creator) - config = ( PPOConfig() .environment( @@ -57,20 +62,19 @@ def _env_creator(cfg): clip_rewards=True, ) .env_runners( - # num_envs_per_env_runner=5, # 5 on old yaml example env_to_module_connector=_make_env_to_module_connector, ) .training( learner_connector=_make_learner_connector, - train_batch_size_per_learner=4000, # 5000 on old yaml example - minibatch_size=128, # 500 on old yaml example + train_batch_size_per_learner=4000, + minibatch_size=128, lambda_=0.95, kl_coeff=0.5, clip_param=0.1, vf_clip_param=10.0, entropy_coeff=0.01, num_epochs=10, - lr=0.00015 * (args.num_learners or 1), + lr=0.00015 * NUM_LEARNERS, grad_clip=100.0, grad_clip_by="global_norm", ) @@ -83,7 +87,7 @@ def _env_creator(cfg): ), ) ) - +# ws-template-code-end if __name__ == "__main__": from ray.rllib.utils.test_utils import run_rllib_example_script_experiment From f2f64f18e0106f176991ba085d18c4d8bb4e1f7e Mon Sep 17 00:00:00 2001 From: Artur Niederfahrenhorst Date: Thu, 28 Nov 2024 15:48:11 +0100 Subject: [PATCH 2/4] lint --- rllib/examples/envs/custom_gym_env.py | 3 +++ rllib/tuned_examples/ppo/atari_ppo.py | 1 + 2 files changed, 4 insertions(+) diff --git a/rllib/examples/envs/custom_gym_env.py b/rllib/examples/envs/custom_gym_env.py index facf5357989e..ea2957f451a9 100644 --- a/rllib/examples/envs/custom_gym_env.py +++ b/rllib/examples/envs/custom_gym_env.py @@ -53,6 +53,7 @@ import random from typing import Optional + # ws-template-imports-end from ray.rllib.utils.test_utils import ( @@ -128,6 +129,8 @@ def step(self, action): truncated, infos, ) + + # ws-template-code-end if __name__ == "__main__": diff --git a/rllib/tuned_examples/ppo/atari_ppo.py b/rllib/tuned_examples/ppo/atari_ppo.py index 6f212ba8d0a8..11bc18699945 100644 --- a/rllib/tuned_examples/ppo/atari_ppo.py +++ b/rllib/tuned_examples/ppo/atari_ppo.py @@ -9,6 +9,7 @@ from ray.rllib.core.rl_module.default_model_config import DefaultModelConfig from ray.rllib.env.wrappers.atari_wrappers import wrap_atari_for_new_api_stack from ray.rllib.utils.test_utils import add_rllib_example_script_args + # ws-template-imports-end parser = add_rllib_example_script_args( From 4baf5ccc3337134f546ec1d91ed61e7f4e714702 Mon Sep 17 00:00:00 2001 From: Artur Niederfahrenhorst Date: Thu, 28 Nov 2024 17:48:26 +0100 Subject: [PATCH 3/4] lint --- rllib/examples/envs/custom_gym_env.py | 1 + rllib/tuned_examples/ppo/atari_ppo.py | 1 + 2 files changed, 2 insertions(+) diff --git a/rllib/examples/envs/custom_gym_env.py b/rllib/examples/envs/custom_gym_env.py index ea2957f451a9..ecf9b30097aa 100644 --- a/rllib/examples/envs/custom_gym_env.py +++ b/rllib/examples/envs/custom_gym_env.py @@ -74,6 +74,7 @@ "starting- and goal states.", ) + # Note(Artur): this lets us extract portions of the script on Anyscale # ws-template-code-start class SimpleCorridor(gym.Env): diff --git a/rllib/tuned_examples/ppo/atari_ppo.py b/rllib/tuned_examples/ppo/atari_ppo.py index 11bc18699945..23248543ed6c 100644 --- a/rllib/tuned_examples/ppo/atari_ppo.py +++ b/rllib/tuned_examples/ppo/atari_ppo.py @@ -28,6 +28,7 @@ NUM_LEARNERS = args.num_learners or 1 ENV = args.env + # Note(Artur): this lets us extract portions of the script on Anyscale # ws-template-code-start def _make_env_to_module_connector(env): From 610755cb66d1a62579391e44b740ad3875c233eb Mon Sep 17 00:00:00 2001 From: Sven Mika Date: Mon, 2 Dec 2024 10:31:32 +0100 Subject: [PATCH 4/4] Apply suggestions from code review Signed-off-by: Sven Mika --- rllib/examples/envs/custom_gym_env.py | 4 ++-- rllib/tuned_examples/ppo/atari_ppo.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/rllib/examples/envs/custom_gym_env.py b/rllib/examples/envs/custom_gym_env.py index ecf9b30097aa..2612575adb63 100644 --- a/rllib/examples/envs/custom_gym_env.py +++ b/rllib/examples/envs/custom_gym_env.py @@ -45,7 +45,7 @@ | 18.3034 | 28000 | 0.908918 | 12.9676 | +------------------+-------+----------+--------------------+ """ -# Note(Artur): this lets us extract portions of the script on Anyscale +# These tags allow extracting portions of this script on Anyscale. # ws-template-imports-start import gymnasium as gym from gymnasium.spaces import Discrete, Box @@ -75,7 +75,7 @@ ) -# Note(Artur): this lets us extract portions of the script on Anyscale +# These tags allow extracting portions of this script on Anyscale. # ws-template-code-start class SimpleCorridor(gym.Env): """Example of a custom env in which the agent has to walk down a corridor. diff --git a/rllib/tuned_examples/ppo/atari_ppo.py b/rllib/tuned_examples/ppo/atari_ppo.py index 23248543ed6c..c58c47898a1a 100644 --- a/rllib/tuned_examples/ppo/atari_ppo.py +++ b/rllib/tuned_examples/ppo/atari_ppo.py @@ -1,4 +1,4 @@ -# Note(Artur): this lets us extract portions of the script on Anyscale +# These tags allow extracting portions of this script on Anyscale. # ws-template-imports-start import gymnasium as gym @@ -29,7 +29,7 @@ ENV = args.env -# Note(Artur): this lets us extract portions of the script on Anyscale +# These tags allow extracting portions of this script on Anyscale. # ws-template-code-start def _make_env_to_module_connector(env): return FrameStackingEnvToModule(num_frames=4)