Updated CustomEnvironment tutorial comments (#1084)

Farama-Foundation · Sep 4, 2023 · ace9b76 · ace9b76
1 parent c3dc056
commit ace9b76
Show file tree

Hide file tree

Showing 3 changed files with 109 additions and 2 deletions.
diff --git a/docs/tutorials/custom_environment/2-environment-logic.md b/docs/tutorials/custom_environment/2-environment-logic.md
@@ -10,8 +10,8 @@ Now that we have a basic understanding of the structure of environment repositor
 
 For this tutorial, we will be creating a two-player game consisting of a prisoner, trying to escape, and a guard, trying to catch the prisoner. This game will be played on a 7x7 grid, where:
 - The prisoner starts in the top left corner,
-- the guard starts in the bottom right corner,
-- the escape door is randomly placed in the middle of the grid, and
+- The guard starts in the bottom right corner,
+- The escape door is randomly placed in the middle of the grid
 - Both the prisoner and the guard can move in any of the four cardinal directions (up, down, left, right).
 
 ## Code

diff --git a/tutorials/CustomEnvironment/tutorial2_adding_game_logic.py b/tutorials/CustomEnvironment/tutorial2_adding_game_logic.py
@@ -9,11 +9,31 @@
 
 
 class CustomEnvironment(ParallelEnv):
+ """The metadata holds environment constants.
+
+ The "name" metadata allows the environment to be pretty printed.
+ """
+
  metadata = {
  "name": "custom_environment_v0",
  }
 
  def __init__(self):
+ """The init method takes in environment arguments.
+
+ Should define the following attributes:
+ - escape x and y coordinates
+ - guard x and y coordinates
+ - prisoner x and y coordinates
+ - timestamp
+ - possible_agents
+
+ Note: as of v1.18.1, the action_spaces and observation_spaces attributes are deprecated.
+ Spaces should be defined in the action_space() and observation_space() methods.
+ If these methods are not overridden, spaces will be inferred from self.observation_spaces/action_spaces, raising a warning.
+
+ These attributes should not be changed after initialization.
+ """
  self.escape_y = None
  self.escape_x = None
  self.guard_y = None
@@ -24,6 +44,19 @@ def __init__(self):
  self.possible_agents = ["prisoner", "guard"]
 
  def reset(self, seed=None, options=None):
+ """Reset set the environment to a starting point.
+
+ It needs to initialize the following attributes:
+ - agents
+ - timestamp
+ - prisoner x and y coordinates
+ - guard x and y coordinates
+ - escape x and y coordinates
+ - observation
+ - infos
+
+ And must set up the environment so that render(), step(), and observe() can be called without issues.
+ """
  self.agents = copy(self.possible_agents)
  self.timestep = 0
 
@@ -51,6 +84,19 @@ def reset(self, seed=None, options=None):
  return observations, infos
 
  def step(self, actions):
+ """Takes in an action for the current agent (specified by agent_selection).
+
+ Needs to update:
+ - prisoner x and y coordinates
+ - guard x and y coordinates
+ - terminations
+ - truncations
+ - rewards
+ - timestamp
+ - infos
+
+ And any internal state used by observe() or render()
+ """
  # Execute actions
  prisoner_action = actions["prisoner"]
  guard_action = actions["guard"]
@@ -110,16 +156,23 @@ def step(self, actions):
  return observations, rewards, terminations, truncations, infos
 
  def render(self):
+ """Renders the environment."""
  grid = np.full((7, 7), " ")
  grid[self.prisoner_y, self.prisoner_x] = "P"
  grid[self.guard_y, self.guard_x] = "G"
  grid[self.escape_y, self.escape_x] = "E"
  print(f"{grid} \n")
 
+ # Observation space should be defined here.
+ # lru_cache allows observation and action spaces to be memoized, reducing clock cycles required to get each agent's space.
+ # If your spaces change over time, remove this line (disable caching).
  @functools.lru_cache(maxsize=None)
  def observation_space(self, agent):
+ # gymnasium spaces are defined and documented here: https://gymnasium.farama.org/api/spaces/
  return MultiDiscrete([7 * 7] * 3)
 
+ # Action space should be defined here.
+ # If your spaces change over time, remove this line (disable caching).
  @functools.lru_cache(maxsize=None)
  def action_space(self, agent):
  return Discrete(4)
diff --git a/tutorials/CustomEnvironment/tutorial3_action_masking.py b/tutorials/CustomEnvironment/tutorial3_action_masking.py
@@ -9,11 +9,31 @@
 
 
 class CustomActionMaskedEnvironment(ParallelEnv):
+ """The metadata holds environment constants.
+
+ The "name" metadata allows the environment to be pretty printed.
+ """
+
  metadata = {
  "name": "custom_environment_v0",
  }
 
  def __init__(self):
+ """The init method takes in environment arguments.
+
+ Should define the following attributes:
+ - escape x and y coordinates
+ - guard x and y coordinates
+ - prisoner x and y coordinates
+ - timestamp
+ - possible_agents
+
+ Note: as of v1.18.1, the action_spaces and observation_spaces attributes are deprecated.
+ Spaces should be defined in the action_space() and observation_space() methods.
+ If these methods are not overridden, spaces will be inferred from self.observation_spaces/action_spaces, raising a warning.
+
+ These attributes should not be changed after initialization.
+ """
  self.escape_y = None
  self.escape_x = None
  self.guard_y = None
@@ -24,6 +44,19 @@ def __init__(self):
  self.possible_agents = ["prisoner", "guard"]
 
  def reset(self, seed=None, options=None):
+ """Reset set the environment to a starting point.
+
+ It needs to initialize the following attributes:
+ - agents
+ - timestamp
+ - prisoner x and y coordinates
+ - guard x and y coordinates
+ - escape x and y coordinates
+ - observation
+ - infos
+
+ And must set up the environment so that render(), step(), and observe() can be called without issues.
+ """
  self.agents = copy(self.possible_agents)
  self.timestep = 0
 
@@ -52,6 +85,19 @@ def reset(self, seed=None, options=None):
  return observations, infos
 
  def step(self, actions):
+ """Takes in an action for the current agent (specified by agent_selection).
+
+ Needs to update:
+ - prisoner x and y coordinates
+ - guard x and y coordinates
+ - terminations
+ - truncations
+ - rewards
+ - timestamp
+ - infos
+
+ And any internal state used by observe() or render()
+ """
  # Execute actions
  prisoner_action = actions["prisoner"]
  guard_action = actions["guard"]
@@ -95,6 +141,7 @@ def step(self, actions):
  elif self.guard_y == 6:
  guard_action_mask[3] = 0
 
+ # Action mask to prevent guard from going over escape cell
  if self.guard_x - 1 == self.escape_x:
  guard_action_mask[0] = 0
  elif self.guard_x + 1 == self.escape_x:
@@ -145,16 +192,23 @@ def step(self, actions):
  return observations, rewards, terminations, truncations, infos
 
  def render(self):
+ """Renders the environment."""
  grid = np.zeros((7, 7))
  grid[self.prisoner_y, self.prisoner_x] = "P"
  grid[self.guard_y, self.guard_x] = "G"
  grid[self.escape_y, self.escape_x] = "E"
  print(f"{grid} \n")
 
+ # Observation space should be defined here.
+ # lru_cache allows observation and action spaces to be memoized, reducing clock cycles required to get each agent's space.
+ # If your spaces change over time, remove this line (disable caching).
  @functools.lru_cache(maxsize=None)
  def observation_space(self, agent):
+ # gymnasium spaces are defined and documented here: https://gymnasium.farama.org/api/spaces/
  return MultiDiscrete([7 * 7 - 1] * 3)
 
+ # Action space should be defined here.
+ # If your spaces change over time, remove this line (disable caching).
  @functools.lru_cache(maxsize=None)
  def action_space(self, agent):
  return Discrete(4)