Update environment creation tutorials (#1082)

Co-authored-by: ggsavin <[email protected]>
Farama-Foundation · Sep 1, 2023 · c3dc056 · c3dc056
1 parent f0c94c6
commit c3dc056
Show file tree

Hide file tree

Showing 33 changed files with 114 additions and 27 deletions.
diff --git a/.github/workflows/linux-tutorials-test.yml b/.github/workflows/linux-tutorials-test.yml
@@ -19,7 +19,7 @@ jobs:
  fail-fast: false
  matrix:
  python-version: ['3.8', '3.9', '3.10', '3.11']
- tutorial: ['Tianshou', 'EnvironmentCreation', 'CleanRL', 'SB3/kaz', 'SB3/waterworld', 'SB3/connect_four', 'SB3/test'] # TODO: add back Ray once next release after 2.6.2
+ tutorial: ['Tianshou', 'CustomEnvironment', 'CleanRL', 'SB3/kaz', 'SB3/waterworld', 'SB3/connect_four', 'SB3/test'] # TODO: add back Ray once next release after 2.6.2
  steps:
  - uses: actions/checkout@v3
  - name: Set up Python ${{ matrix.python-version }}

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -75,3 +75,7 @@ repos:
  additional_dependencies: ["pyright"]
  args:
  - --project=pyproject.toml
+ - repo: https://github.com/python-jsonschema/check-jsonschema
+ rev: 0.26.3
+ hooks:
+ - id: check-github-workflows
diff --git a/docs/code_examples/aec_rps_usage.py b/docs/code_examples/aec_rps_usage.py
@@ -0,0 +1,16 @@
+import aec_rps
+
+env = aec_rps.env(render_mode="human")
+env.reset(seed=42)
+
+for agent in env.agent_iter():
+ observation, reward, termination, truncation, info = env.last()
+
+ if termination or truncation:
+ action = None
+ else:
+ # this is where you would insert your policy
+ action = env.action_space(agent).sample()
+
+ env.step(action)
+env.close()
diff --git a/docs/code_examples/parallel_rps.py b/docs/code_examples/parallel_rps.py
@@ -130,6 +130,7 @@ def reset(self, seed=None, options=None):
  self.num_moves = 0
  observations = {agent: NONE for agent in self.agents}
  infos = {agent: {} for agent in self.agents}
+ self.state = observations
 
  return observations, infos
 
@@ -165,6 +166,7 @@ def step(self, actions):
  self.agents[i]: int(actions[self.agents[1 - i]])
  for i in range(len(self.agents))
  }
+ self.state = observations
 
  # typically there won't be any information in the infos, but there must
  # still be an entry for each agent

diff --git a/docs/code_examples/parallel_rps_usage.py b/docs/code_examples/parallel_rps_usage.py
@@ -0,0 +1,11 @@
+import parallel_rps
+
+env = parallel_rps.parallel_env(render_mode="human")
+observations, infos = env.reset()
+
+while env.agents:
+ # this is where you would insert your policy
+ actions = {agent: env.action_space(agent).sample() for agent in env.agents}
+
+ observations, rewards, terminations, truncations, infos = env.step(actions)
+env.close()
diff --git a/docs/content/environment_creation.md b/docs/content/environment_creation.md
@@ -5,6 +5,11 @@ title: Environment Creation
 
 This documentation overviews creating new environments and relevant useful wrappers, utilities and tests included in PettingZoo designed for the creation of new environments.
 
+
+We will walk through the creation of a simple Rock-Paper-Scissors environment, with example code for both [AEC](/api/aec/) and [Parallel](/api/aec/) environments.
+
+See our [Custom Environment Tutorial](/tutorials/custom_environment/index) for a full walkthrough on creating custom environments, including complex environment logic and illegal action masking.
+
 ## Example Custom Environment
 
 This is a carefully commented version of the PettingZoo rock paper scissors environment.
@@ -14,13 +19,27 @@ This is a carefully commented version of the PettingZoo rock paper scissors envi
  :language: python
 ```
 
+To interact with your custom AEC environment, use the following code:
+
+```{eval-rst}
+.. literalinclude:: ../code_examples/aec_rps_usage.py
+ :language: python
+```
+
 ## Example Custom Parallel Environment
 
 ```{eval-rst}
 .. literalinclude:: ../code_examples/parallel_rps.py
  :language: python
 ```
 
+To interact with your custom parallel environment, use the following code:
+
+```{eval-rst}
+.. literalinclude:: ../code_examples/parallel_rps_usage.py
+ :language: python
+```
+
 ## Using Wrappers
 
 A wrapper is an environment transformation that takes in an environment as input, and outputs a new environment that is similar to the input environment, but with some transformation or validation applied. PettingZoo provides [wrappers to convert environments](/api/pz_wrappers) back and forth between the AEC API and the Parallel API and a set of simple [utility wrappers](/api/pz_wrappers) which provide input validation and other convenient reusable logic. PettingZoo also includes [wrappers](/api/supersuit_wrappers) via the SuperSuit companion package (`pip install supersuit`).

diff --git a/docs/index.md b/docs/index.md
@@ -39,7 +39,7 @@ environments/third_party_envs
 :hidden:
 :caption: Tutorials
 
-tutorials/environmentcreation/index
+tutorials/custom_environment/index
 tutorials/cleanrl/index
 tutorials/tianshou/index
 tutorials/rllib/index

diff --git a/...nvironmentcreation/1-project-structure.md → ...custom_environment/1-project-structure.md b/...nvironmentcreation/1-project-structure.md → ...custom_environment/1-project-structure.md
diff --git a/...nvironmentcreation/2-environment-logic.md → ...custom_environment/2-environment-logic.md b/...nvironmentcreation/2-environment-logic.md → ...custom_environment/2-environment-logic.md
diff --git a/...s/environmentcreation/3-action-masking.md → ...ls/custom_environment/3-action-masking.md b/...s/environmentcreation/3-action-masking.md → ...ls/custom_environment/3-action-masking.md
diff --git a/...entcreation/4-testing-your-environment.md → ...environment/4-testing-your-environment.md b/...entcreation/4-testing-your-environment.md → ...environment/4-testing-your-environment.md
diff --git a/...nmentcreation/5-using-your-environment.md → ...m_environment/5-using-your-environment.md b/...nmentcreation/5-using-your-environment.md → ...m_environment/5-using-your-environment.md
diff --git a/docs/tutorials/environmentcreation/index.md → docs/tutorials/custom_environment/index.md b/docs/tutorials/environmentcreation/index.md → docs/tutorials/custom_environment/index.md
@@ -1,10 +1,10 @@
 ---
-title: "Environment Creation"
+title: "Custom Environment Tutorial"
 ---
 
-# Environment Creation Tutorial
+# Custom Environment Tutorial
 
-These tutorials walk you though creating a custom environment from scratch, and are recommended as a starting point for anyone new to PettingZoo.
+These tutorials walk you though the full process of creating a custom environment from scratch, and are recommended as a starting point for anyone new to PettingZoo.
 
 1. [Project Structure](/tutorials/environmentcreation/1-project-structure.md)
 
@@ -14,6 +14,8 @@ These tutorials walk you though creating a custom environment from scratch, and
 
 4. [Testing Your Environment](/tutorials/environmentcreation/4-testing-your-environment.md)
 
+For a simpler example environment, including both [AEC](/api/aec/) and [Parallel](/api/aec/) implementations, see our [Environment Creation](/content/environment_creation/) documentation.
+
 
 ```{toctree}
 :hidden:

diff --git a/pettingzoo/__init__.py b/pettingzoo/__init__.py
@@ -12,7 +12,7 @@
 
 os.environ["PYGAME_HIDE_SUPPORT_PROMPT"] = "hide"
 
-__version__ = "1.24.0"
+__version__ = "1.24.1"
 
 try:
  import sys

diff --git a/pettingzoo/test/parallel_test.py b/pettingzoo/test/parallel_test.py
@@ -46,8 +46,11 @@ def parallel_api_test(par_env: ParallelEnv, num_cycles=1000):
  MAX_RESETS = 2
  for _ in range(MAX_RESETS):
  obs, infos = par_env.reset()
+
  assert isinstance(obs, dict)
+ assert isinstance(infos, dict)
  assert set(obs.keys()) == (set(par_env.agents))
+ assert set(infos.keys()) == (set(par_env.agents))
  terminated = {agent: False for agent in par_env.agents}
  truncated = {agent: False for agent in par_env.agents}
  live_agents = set(par_env.agents[:])
@@ -127,3 +130,4 @@ def parallel_api_test(par_env: ParallelEnv, num_cycles=1000):
 
  if len(live_agents) == 0:
  break
+ print("Passed Parallel API test")
diff --git a/pettingzoo/utils/conversions.py b/pettingzoo/utils/conversions.py
@@ -1,3 +1,4 @@
+# pyright: reportGeneralTypeIssues=false
 import copy
 import warnings
 from collections import defaultdict
@@ -304,6 +305,19 @@ def reset(self, seed=None, options=None):
  self.terminations = {agent: False for agent in self.agents}
  self.truncations = {agent: False for agent in self.agents}
  self.rewards = {agent: 0 for agent in self.agents}
+
+ # Every environment needs to return infos that contain self.agents as their keys
+ if not self.infos:
+ warnings.warn(
+ "The `infos` dictionary returned by `env.reset` was empty. OverwritingAgent IDs will be used as keys"
+ )
+ self.infos = {agent: {} for agent in self.agents}
+ elif set(self.infos.keys()) != set(self.agents):
+ self.infos = {agent: {self.infos.copy()} for agent in self.agents}
+ warnings.warn(
+ f"The `infos` dictionary returned by `env.reset()` is not valid: must contain keys for each agent defined in self.agents: {self.agents}. Overwriting with current info duplicated for each agent: {self.infos}"
+ )
+
  self._cumulative_rewards = {agent: 0 for agent in self.agents}
  self.new_agents = []
  self.new_values = {}

diff --git a/tutorials/CleanRL/requirements.txt b/tutorials/CleanRL/requirements.txt
@@ -1,4 +1,4 @@
-pettingzoo[butterfly,atari,testing]>=1.23.1
+pettingzoo[butterfly,atari,testing]>=1.24.0
 SuperSuit>=3.9.0
 tensorboard>=2.11.2
 torch>=1.13.1
diff --git a/tutorials/CustomEnvironment/requirements.txt b/tutorials/CustomEnvironment/requirements.txt
@@ -0,0 +1 @@
+pettingzoo==1.24.0
diff --git a/...ntCreation/tutorial1_skeleton_creation.py → ...nvironment/tutorial1_skeleton_creation.py b/...ntCreation/tutorial1_skeleton_creation.py → ...nvironment/tutorial1_skeleton_creation.py
diff --git a/...ntCreation/tutorial2_adding_game_logic.py → ...nvironment/tutorial2_adding_game_logic.py b/...ntCreation/tutorial2_adding_game_logic.py → ...nvironment/tutorial2_adding_game_logic.py
@@ -44,7 +44,11 @@ def reset(self, seed=None, options=None):
  )
  for a in self.agents
  }
- return observations, {}
+
+ # Get dummy infos. Necessary for proper parallel_to_aec conversion
+ infos = {a: {} for a in self.agents}
+
+ return observations, infos
 
  def step(self, actions):
  # Execute actions
@@ -85,7 +89,6 @@ def step(self, actions):
  if self.timestep > 100:
  rewards = {"prisoner": 0, "guard": 0}
  truncations = {"prisoner": True, "guard": True}
- self.agents = []
  self.timestep += 1
 
  # Get observations
@@ -101,6 +104,9 @@ def step(self, actions):
  # Get dummy infos (not used in this example)
  infos = {a: {} for a in self.agents}
 
+ if any(terminations.values()) or all(truncations.values()):
+ self.agents = []
+
  return observations, rewards, terminations, truncations, infos
 
  def render(self):

diff --git a/...nmentCreation/tutorial3_action_masking.py → ...omEnvironment/tutorial3_action_masking.py b/...nmentCreation/tutorial3_action_masking.py → ...omEnvironment/tutorial3_action_masking.py
@@ -8,7 +8,7 @@
 from pettingzoo import ParallelEnv
 
 
-class CustomEnvironment(ParallelEnv):
+class CustomActionMaskedEnvironment(ParallelEnv):
  metadata = {
  "name": "custom_environment_v0",
  }
@@ -45,7 +45,11 @@ def reset(self, seed=None, options=None):
  "prisoner": {"observation": observation, "action_mask": [0, 1, 1, 0]},
  "guard": {"observation": observation, "action_mask": [1, 0, 0, 1]},
  }
- return observations, {}
+
+ # Get dummy infos. Necessary for proper parallel_to_aec conversion
+ infos = {a: {} for a in self.agents}
+
+ return observations, infos
 
  def step(self, actions):
  # Execute actions

diff --git a/tutorials/CustomEnvironment/tutorial4_testing_the_environment.py b/tutorials/CustomEnvironment/tutorial4_testing_the_environment.py
@@ -0,0 +1,11 @@
+from tutorial2_adding_game_logic import CustomEnvironment
+from tutorial3_action_masking import CustomActionMaskedEnvironment
+
+from pettingzoo.test import parallel_api_test
+
+if __name__ == "__main__":
+ env = CustomEnvironment()
+ parallel_api_test(env, num_cycles=1_000_000)
+
+ env = CustomActionMaskedEnvironment()
+ parallel_api_test(env, num_cycles=1_000_000)
diff --git a/tutorials/EnvironmentCreation/4-TestingTheEnvironment.txt b/tutorials/EnvironmentCreation/4-TestingTheEnvironment.txt
diff --git a/tutorials/EnvironmentCreation/5-UsingWithAPI.txt b/tutorials/EnvironmentCreation/5-UsingWithAPI.txt
diff --git a/tutorials/EnvironmentCreation/6-UsingWithRL.txt b/tutorials/EnvironmentCreation/6-UsingWithRL.txt
diff --git a/tutorials/EnvironmentCreation/requirements.txt b/tutorials/EnvironmentCreation/requirements.txt
diff --git a/tutorials/EnvironmentCreation/tutorial4_testing_the_environment.py b/tutorials/EnvironmentCreation/tutorial4_testing_the_environment.py
diff --git a/tutorials/Ray/requirements.txt b/tutorials/Ray/requirements.txt
@@ -1,6 +1,7 @@
-PettingZoo[classic, butterfly]==1.23.1
+PettingZoo[classic,butterfly]>=1.24.0
 Pillow>=9.4.0
-ray[rllib]>2.6.2
-SuperSuit==3.8.0
+# note: currently requires nightly release, see https://docs.ray.io/en/latest/ray-overview/installation.html#daily-releases-nightlies
+ray[rllib]>2.6.3
+SuperSuit>=3.9.0
 torch>=1.13.1
 tensorflow-probability>=0.19.0
diff --git a/tutorials/SB3/connect_four/requirements.txt b/tutorials/SB3/connect_four/requirements.txt
@@ -1,3 +1,3 @@
-pettingzoo[classic]>=1.23.1
+pettingzoo[classic]>=1.24.0
 stable-baselines3>=2.0.0
 sb3-contrib>=2.0.0
diff --git a/tutorials/SB3/kaz/requirements.txt b/tutorials/SB3/kaz/requirements.txt
@@ -1,3 +1,3 @@
-pettingzoo[butterfly]>=1.23.1
+pettingzoo[butterfly]>=1.24.0
 stable-baselines3>=2.0.0
 supersuit>=3.9.0
diff --git a/tutorials/SB3/pistonball/requirements.txt b/tutorials/SB3/pistonball/requirements.txt
@@ -1,3 +1,3 @@
-pettingzoo[butterfly]>=1.23.1
+pettingzoo[butterfly]>=1.24.0
 stable-baselines3>=2.0.0
 supersuit>=3.9.0
diff --git a/tutorials/SB3/test/requirements.txt b/tutorials/SB3/test/requirements.txt
@@ -1,4 +1,4 @@
-pettingzoo[classic]>=1.23.1
+pettingzoo[classic]>=1.24.0
 stable-baselines3>=2.0.0
 sb3-contrib>=2.0.0
 pytest
diff --git a/tutorials/SB3/waterworld/requirements.txt b/tutorials/SB3/waterworld/requirements.txt
@@ -1,4 +1,4 @@
-pettingzoo[sisl]>=1.23.1
+pettingzoo[sisl]>=1.24.0
 stable-baselines3>=2.0.0
 supersuit>=3.9.0
 pymunk