ray-project · ArturNiederfahrenhorst · Nov 28, 2024 · Nov 28, 2024 · Nov 28, 2024
@@ -45,13 +45,17 @@
 |          18.3034 | 28000 | 0.908918 |            12.9676 |
 +------------------+-------+----------+--------------------+
 """
+# Note(Artur): this lets us extract portions of the script on Anyscale
+# ws-template-imports-start
 import gymnasium as gym
 from gymnasium.spaces import Discrete, Box
 import numpy as np
 import random
 
 from typing import Optional
 
+# ws-template-imports-end
+
 from ray.rllib.utils.test_utils import (
     add_rllib_example_script_args,
     run_rllib_example_script_experiment,
@@ -71,6 +75,8 @@
 )
 
 
+# Note(Artur): this lets us extract portions of the script on Anyscale
+# ws-template-code-start
 class SimpleCorridor(gym.Env):
     """Example of a custom env in which the agent has to walk down a corridor.
 
@@ -126,6 +132,8 @@ def step(self, action):
         )
 
 
+# ws-template-code-end
+
 if __name__ == "__main__":
     args = parser.parse_args()
 

@@ -1,3 +1,5 @@
+# Note(Artur): this lets us extract portions of the script on Anyscale
+# ws-template-imports-start
 import gymnasium as gym
 
 from ray import tune
@@ -8,6 +10,7 @@
 from ray.rllib.env.wrappers.atari_wrappers import wrap_atari_for_new_api_stack
 from ray.rllib.utils.test_utils import add_rllib_example_script_args
 
+# ws-template-imports-end
 
 parser = add_rllib_example_script_args(
     default_reward=float("inf"),
@@ -22,7 +25,12 @@
 # and (if needed) use their values to set up `config` below.
 args = parser.parse_args()
 
+NUM_LEARNERS = args.num_learners or 1
+ENV = args.env
 
+
+# Note(Artur): this lets us extract portions of the script on Anyscale
+# ws-template-code-start
 def _make_env_to_module_connector(env):
     return FrameStackingEnvToModule(num_frames=4)
 
@@ -35,15 +43,14 @@ def _make_learner_connector(input_observation_space, input_action_space):
 # We would like our frame stacking connector to do this job.
 def _env_creator(cfg):
     return wrap_atari_for_new_api_stack(
-        gym.make(args.env, **cfg, render_mode="rgb_array"),
+        gym.make(ENV, **cfg, render_mode="rgb_array"),
         # Perform frame-stacking through ConnectorV2 API.
         framestack=None,
     )
 
 
 tune.register_env("env", _env_creator)
 
-
 config = (
     PPOConfig()
     .environment(
@@ -57,20 +64,19 @@ def _env_creator(cfg):
         clip_rewards=True,
     )
     .env_runners(
-        # num_envs_per_env_runner=5,  # 5 on old yaml example
         env_to_module_connector=_make_env_to_module_connector,
     )
     .training(
         learner_connector=_make_learner_connector,
-        train_batch_size_per_learner=4000,  # 5000 on old yaml example
-        minibatch_size=128,  # 500 on old yaml example
+        train_batch_size_per_learner=4000,
+        minibatch_size=128,
         lambda_=0.95,
         kl_coeff=0.5,
         clip_param=0.1,
         vf_clip_param=10.0,
         entropy_coeff=0.01,
         num_epochs=10,
-        lr=0.00015 * (args.num_learners or 1),
+        lr=0.00015 * NUM_LEARNERS,
         grad_clip=100.0,
         grad_clip_by="global_norm",
     )
@@ -83,7 +89,7 @@ def _env_creator(cfg):
         ),
     )
 )
-
+# ws-template-code-end
 
 if __name__ == "__main__":
     from ray.rllib.utils.test_utils import run_rllib_example_script_experiment