Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[RLlib] Add comments to example files for templates #48988

Draft
wants to merge 3 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions rllib/examples/envs/custom_gym_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,13 +45,17 @@
| 18.3034 | 28000 | 0.908918 | 12.9676 |
+------------------+-------+----------+--------------------+
"""
# Note(Artur): this lets us extract portions of the script on Anyscale
# ws-template-imports-start
import gymnasium as gym
from gymnasium.spaces import Discrete, Box
import numpy as np
import random

from typing import Optional

# ws-template-imports-end

from ray.rllib.utils.test_utils import (
add_rllib_example_script_args,
run_rllib_example_script_experiment,
Expand All @@ -71,6 +75,8 @@
)


# Note(Artur): this lets us extract portions of the script on Anyscale
# ws-template-code-start
class SimpleCorridor(gym.Env):
"""Example of a custom env in which the agent has to walk down a corridor.

Expand Down Expand Up @@ -126,6 +132,8 @@ def step(self, action):
)


# ws-template-code-end

if __name__ == "__main__":
args = parser.parse_args()

Expand Down
20 changes: 13 additions & 7 deletions rllib/tuned_examples/ppo/atari_ppo.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# Note(Artur): this lets us extract portions of the script on Anyscale
# ws-template-imports-start
import gymnasium as gym

from ray import tune
Expand All @@ -8,6 +10,7 @@
from ray.rllib.env.wrappers.atari_wrappers import wrap_atari_for_new_api_stack
from ray.rllib.utils.test_utils import add_rllib_example_script_args

# ws-template-imports-end

parser = add_rllib_example_script_args(
default_reward=float("inf"),
Expand All @@ -22,7 +25,12 @@
# and (if needed) use their values to set up `config` below.
args = parser.parse_args()

NUM_LEARNERS = args.num_learners or 1
ENV = args.env


# Note(Artur): this lets us extract portions of the script on Anyscale
# ws-template-code-start
def _make_env_to_module_connector(env):
return FrameStackingEnvToModule(num_frames=4)

Expand All @@ -35,15 +43,14 @@ def _make_learner_connector(input_observation_space, input_action_space):
# We would like our frame stacking connector to do this job.
def _env_creator(cfg):
return wrap_atari_for_new_api_stack(
gym.make(args.env, **cfg, render_mode="rgb_array"),
gym.make(ENV, **cfg, render_mode="rgb_array"),
# Perform frame-stacking through ConnectorV2 API.
framestack=None,
)


tune.register_env("env", _env_creator)


config = (
PPOConfig()
.environment(
Expand All @@ -57,20 +64,19 @@ def _env_creator(cfg):
clip_rewards=True,
)
.env_runners(
# num_envs_per_env_runner=5, # 5 on old yaml example
env_to_module_connector=_make_env_to_module_connector,
)
.training(
learner_connector=_make_learner_connector,
train_batch_size_per_learner=4000, # 5000 on old yaml example
minibatch_size=128, # 500 on old yaml example
train_batch_size_per_learner=4000,
minibatch_size=128,
lambda_=0.95,
kl_coeff=0.5,
clip_param=0.1,
vf_clip_param=10.0,
entropy_coeff=0.01,
num_epochs=10,
lr=0.00015 * (args.num_learners or 1),
lr=0.00015 * NUM_LEARNERS,
grad_clip=100.0,
grad_clip_by="global_norm",
)
Expand All @@ -83,7 +89,7 @@ def _env_creator(cfg):
),
)
)

# ws-template-code-end

if __name__ == "__main__":
from ray.rllib.utils.test_utils import run_rllib_example_script_experiment
Expand Down