From 671f4f0e3108056369ab2e7d544588920f463742 Mon Sep 17 00:00:00 2001
From: Artur Niederfahrenhorst <attaismyname@googlemail.com>
Date: Thu, 28 Nov 2024 11:35:18 +0100
Subject: [PATCH 1/4] add comments

---
 rllib/examples/envs/custom_gym_env.py |  8 ++++++--
 rllib/tuned_examples/ppo/atari_ppo.py | 20 ++++++++++++--------
 2 files changed, 18 insertions(+), 10 deletions(-)

diff --git a/rllib/examples/envs/custom_gym_env.py b/rllib/examples/envs/custom_gym_env.py
index 01fa5ecc452f..facf5357989e 100644
--- a/rllib/examples/envs/custom_gym_env.py
+++ b/rllib/examples/envs/custom_gym_env.py
@@ -45,12 +45,15 @@
 |          18.3034 | 28000 | 0.908918 |            12.9676 |
 +------------------+-------+----------+--------------------+
 """
+# Note(Artur): this lets us extract portions of the script on Anyscale
+# ws-template-imports-start
 import gymnasium as gym
 from gymnasium.spaces import Discrete, Box
 import numpy as np
 import random
 
 from typing import Optional
+# ws-template-imports-end
 
 from ray.rllib.utils.test_utils import (
     add_rllib_example_script_args,
@@ -70,7 +73,8 @@
     "starting- and goal states.",
 )
 
-
+# Note(Artur): this lets us extract portions of the script on Anyscale
+# ws-template-code-start
 class SimpleCorridor(gym.Env):
     """Example of a custom env in which the agent has to walk down a corridor.
 
@@ -124,7 +128,7 @@ def step(self, action):
             truncated,
             infos,
         )
-
+# ws-template-code-end
 
 if __name__ == "__main__":
     args = parser.parse_args()
diff --git a/rllib/tuned_examples/ppo/atari_ppo.py b/rllib/tuned_examples/ppo/atari_ppo.py
index b4d881574f4e..6f212ba8d0a8 100644
--- a/rllib/tuned_examples/ppo/atari_ppo.py
+++ b/rllib/tuned_examples/ppo/atari_ppo.py
@@ -1,3 +1,5 @@
+# Note(Artur): this lets us extract portions of the script on Anyscale
+# ws-template-imports-start
 import gymnasium as gym
 
 from ray import tune
@@ -7,7 +9,7 @@
 from ray.rllib.core.rl_module.default_model_config import DefaultModelConfig
 from ray.rllib.env.wrappers.atari_wrappers import wrap_atari_for_new_api_stack
 from ray.rllib.utils.test_utils import add_rllib_example_script_args
-
+# ws-template-imports-end
 
 parser = add_rllib_example_script_args(
     default_reward=float("inf"),
@@ -22,7 +24,11 @@
 # and (if needed) use their values to set up `config` below.
 args = parser.parse_args()
 
+NUM_LEARNERS = args.num_learners or 1
+ENV = args.env
 
+# Note(Artur): this lets us extract portions of the script on Anyscale
+# ws-template-code-start
 def _make_env_to_module_connector(env):
     return FrameStackingEnvToModule(num_frames=4)
 
@@ -35,7 +41,7 @@ def _make_learner_connector(input_observation_space, input_action_space):
 # We would like our frame stacking connector to do this job.
 def _env_creator(cfg):
     return wrap_atari_for_new_api_stack(
-        gym.make(args.env, **cfg, render_mode="rgb_array"),
+        gym.make(ENV, **cfg, render_mode="rgb_array"),
         # Perform frame-stacking through ConnectorV2 API.
         framestack=None,
     )
@@ -43,7 +49,6 @@ def _env_creator(cfg):
 
 tune.register_env("env", _env_creator)
 
-
 config = (
     PPOConfig()
     .environment(
@@ -57,20 +62,19 @@ def _env_creator(cfg):
         clip_rewards=True,
     )
     .env_runners(
-        # num_envs_per_env_runner=5,  # 5 on old yaml example
         env_to_module_connector=_make_env_to_module_connector,
     )
     .training(
         learner_connector=_make_learner_connector,
-        train_batch_size_per_learner=4000,  # 5000 on old yaml example
-        minibatch_size=128,  # 500 on old yaml example
+        train_batch_size_per_learner=4000,
+        minibatch_size=128,
         lambda_=0.95,
         kl_coeff=0.5,
         clip_param=0.1,
         vf_clip_param=10.0,
         entropy_coeff=0.01,
         num_epochs=10,
-        lr=0.00015 * (args.num_learners or 1),
+        lr=0.00015 * NUM_LEARNERS,
         grad_clip=100.0,
         grad_clip_by="global_norm",
     )
@@ -83,7 +87,7 @@ def _env_creator(cfg):
         ),
     )
 )
-
+# ws-template-code-end
 
 if __name__ == "__main__":
     from ray.rllib.utils.test_utils import run_rllib_example_script_experiment

From f2f64f18e0106f176991ba085d18c4d8bb4e1f7e Mon Sep 17 00:00:00 2001
From: Artur Niederfahrenhorst <attaismyname@googlemail.com>
Date: Thu, 28 Nov 2024 15:48:11 +0100
Subject: [PATCH 2/4] lint

---
 rllib/examples/envs/custom_gym_env.py | 3 +++
 rllib/tuned_examples/ppo/atari_ppo.py | 1 +
 2 files changed, 4 insertions(+)

diff --git a/rllib/examples/envs/custom_gym_env.py b/rllib/examples/envs/custom_gym_env.py
index facf5357989e..ea2957f451a9 100644
--- a/rllib/examples/envs/custom_gym_env.py
+++ b/rllib/examples/envs/custom_gym_env.py
@@ -53,6 +53,7 @@
 import random
 
 from typing import Optional
+
 # ws-template-imports-end
 
 from ray.rllib.utils.test_utils import (
@@ -128,6 +129,8 @@ def step(self, action):
             truncated,
             infos,
         )
+
+
 # ws-template-code-end
 
 if __name__ == "__main__":
diff --git a/rllib/tuned_examples/ppo/atari_ppo.py b/rllib/tuned_examples/ppo/atari_ppo.py
index 6f212ba8d0a8..11bc18699945 100644
--- a/rllib/tuned_examples/ppo/atari_ppo.py
+++ b/rllib/tuned_examples/ppo/atari_ppo.py
@@ -9,6 +9,7 @@
 from ray.rllib.core.rl_module.default_model_config import DefaultModelConfig
 from ray.rllib.env.wrappers.atari_wrappers import wrap_atari_for_new_api_stack
 from ray.rllib.utils.test_utils import add_rllib_example_script_args
+
 # ws-template-imports-end
 
 parser = add_rllib_example_script_args(

From 4baf5ccc3337134f546ec1d91ed61e7f4e714702 Mon Sep 17 00:00:00 2001
From: Artur Niederfahrenhorst <attaismyname@googlemail.com>
Date: Thu, 28 Nov 2024 17:48:26 +0100
Subject: [PATCH 3/4] lint

---
 rllib/examples/envs/custom_gym_env.py | 1 +
 rllib/tuned_examples/ppo/atari_ppo.py | 1 +
 2 files changed, 2 insertions(+)

diff --git a/rllib/examples/envs/custom_gym_env.py b/rllib/examples/envs/custom_gym_env.py
index ea2957f451a9..ecf9b30097aa 100644
--- a/rllib/examples/envs/custom_gym_env.py
+++ b/rllib/examples/envs/custom_gym_env.py
@@ -74,6 +74,7 @@
     "starting- and goal states.",
 )
 
+
 # Note(Artur): this lets us extract portions of the script on Anyscale
 # ws-template-code-start
 class SimpleCorridor(gym.Env):
diff --git a/rllib/tuned_examples/ppo/atari_ppo.py b/rllib/tuned_examples/ppo/atari_ppo.py
index 11bc18699945..23248543ed6c 100644
--- a/rllib/tuned_examples/ppo/atari_ppo.py
+++ b/rllib/tuned_examples/ppo/atari_ppo.py
@@ -28,6 +28,7 @@
 NUM_LEARNERS = args.num_learners or 1
 ENV = args.env
 
+
 # Note(Artur): this lets us extract portions of the script on Anyscale
 # ws-template-code-start
 def _make_env_to_module_connector(env):

From 610755cb66d1a62579391e44b740ad3875c233eb Mon Sep 17 00:00:00 2001
From: Sven Mika <sven@anyscale.io>
Date: Mon, 2 Dec 2024 10:31:32 +0100
Subject: [PATCH 4/4] Apply suggestions from code review

Signed-off-by: Sven Mika <sven@anyscale.io>
---
 rllib/examples/envs/custom_gym_env.py | 4 ++--
 rllib/tuned_examples/ppo/atari_ppo.py | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/rllib/examples/envs/custom_gym_env.py b/rllib/examples/envs/custom_gym_env.py
index ecf9b30097aa..2612575adb63 100644
--- a/rllib/examples/envs/custom_gym_env.py
+++ b/rllib/examples/envs/custom_gym_env.py
@@ -45,7 +45,7 @@
 |          18.3034 | 28000 | 0.908918 |            12.9676 |
 +------------------+-------+----------+--------------------+
 """
-# Note(Artur): this lets us extract portions of the script on Anyscale
+# These tags allow extracting portions of this script on Anyscale.
 # ws-template-imports-start
 import gymnasium as gym
 from gymnasium.spaces import Discrete, Box
@@ -75,7 +75,7 @@
 )
 
 
-# Note(Artur): this lets us extract portions of the script on Anyscale
+# These tags allow extracting portions of this script on Anyscale.
 # ws-template-code-start
 class SimpleCorridor(gym.Env):
     """Example of a custom env in which the agent has to walk down a corridor.
diff --git a/rllib/tuned_examples/ppo/atari_ppo.py b/rllib/tuned_examples/ppo/atari_ppo.py
index 23248543ed6c..c58c47898a1a 100644
--- a/rllib/tuned_examples/ppo/atari_ppo.py
+++ b/rllib/tuned_examples/ppo/atari_ppo.py
@@ -1,4 +1,4 @@
-# Note(Artur): this lets us extract portions of the script on Anyscale
+# These tags allow extracting portions of this script on Anyscale.
 # ws-template-imports-start
 import gymnasium as gym
 
@@ -29,7 +29,7 @@
 ENV = args.env
 
 
-# Note(Artur): this lets us extract portions of the script on Anyscale
+# These tags allow extracting portions of this script on Anyscale.
 # ws-template-code-start
 def _make_env_to_module_connector(env):
     return FrameStackingEnvToModule(num_frames=4)