Bump versions of affected environments. Change documentation to refle…

…ct new version number. Simplify reset test.
Farama-Foundation · Feb 29, 2024 · 9dd88f7 · 9dd88f7
1 parent 007afa8
commit 9dd88f7
Show file tree

Hide file tree

Showing 12 changed files with 39 additions and 111 deletions.
diff --git a/README.md b/README.md
@@ -54,7 +54,7 @@ goal, e.g. state derived from the simulation.
 ```python
 import gymnasium as gym
 
-env = gym.make("FetchReach-v2")
+env = gym.make("FetchReach-v3")
 env.reset()
 obs, reward, terminated, truncated, info = env.step(env.action_space.sample())
 

diff --git a/docs/content/multi-goal_api.md b/docs/content/multi-goal_api.md
@@ -25,7 +25,7 @@ import gymnasium_robotics
 
 gym.register_envs(gymnasium_robotics)
 
-env = gym.make("FetchReach-v2")
+env = gym.make("FetchReach-v3")
 env.reset()
 obs, reward, terminated, truncated, info = env.step(env.action_space.sample())
 

diff --git a/docs/envs/fetch/index.md b/docs/envs/fetch/index.md
@@ -7,10 +7,10 @@ lastpage:
 
 The Fetch environments are based on the 7-DoF [Fetch Mobile Manipulator](https://fetchrobotics.com/) arm, with a two-fingered parallel gripper attached to it. The main environment tasks are the following:
 
-* `FetchReach-v2`: Fetch has to move its end-effector to the desired goal position.
-* `FetchPush-v2`: Fetch has to move a box by pushing it until it reaches a desired goal position.
-* `FetchSlide-v2`: Fetch has to hit a puck across a long table such that it slides and comes to rest on the desired goal.
-* `FetchPickAndPlace-v2`: Fetch has to pick up a box from a table using its gripper and move it to a desired goal above the table.
+* `FetchReach-v3`: Fetch has to move its end-effector to the desired goal position.
+* `FetchPush-v3`: Fetch has to move a box by pushing it until it reaches a desired goal position.
+* `FetchSlide-v3`: Fetch has to hit a puck across a long table such that it slides and comes to rest on the desired goal.
+* `FetchPickAndPlace-v3`: Fetch has to pick up a box from a table using its gripper and move it to a desired goal above the table.
 
 ```{raw} html
     :file: list.html

diff --git a/docs/envs/shadow_dexterous_hand/index.md b/docs/envs/shadow_dexterous_hand/index.md
@@ -7,7 +7,7 @@ lastpage:
 
 These environments are based on the [Shadow Dexterous Hand](https://www.shadowrobot.com/), 5 which is an anthropomorphic robotic hand with 24 degrees of freedom. Of those 24 joints, 20 can be controlled independently whereas the remaining ones are coupled joints.
 
-* `HandReach-v1`: ShadowHand has to reach with its thumb and a selected finger until they meet at a desired goal position above the palm.
+* `HandReach-v2`: ShadowHand has to reach with its thumb and a selected finger until they meet at a desired goal position above the palm.
 * `HandManipulateBlock-v1`: ShadowHand has to manipulate a block until it achieves a desired goal position and rotation.
 * `HandManipulateEgg-v1`: ShadowHand has to manipulate an egg until it achieves a desired goal position and rotation.
 * `HandManipulatePen-v1`: ShadowHand has to manipulate a pen until it achieves a desired goal position and rotation.

diff --git a/docs/index.md b/docs/index.md
@@ -56,7 +56,7 @@ import gymnasium_robotics
 
 gym.register_envs(gymnasium_robotics)
 
-env = gym.make("FetchPickAndPlace-v2", render_mode="human")
+env = gym.make("FetchPickAndPlace-v3", render_mode="human")
 observation, info = env.reset(seed=42)
 for _ in range(1000):
    action = policy(observation)  # User-defined policy function

diff --git a/gymnasium_robotics/__init__.py b/gymnasium_robotics/__init__.py
@@ -30,7 +30,7 @@ def _merge(a, b):
         )
 
         register(
-            id=f"FetchSlide{suffix}-v2",
+            id=f"FetchSlide{suffix}-v3",
             entry_point="gymnasium_robotics.envs.fetch.slide:MujocoFetchSlideEnv",
             kwargs=kwargs,
             max_episode_steps=50,
@@ -44,7 +44,7 @@ def _merge(a, b):
         )
 
         register(
-            id=f"FetchPickAndPlace{suffix}-v2",
+            id=f"FetchPickAndPlace{suffix}-v3",
             entry_point="gymnasium_robotics.envs.fetch.pick_and_place:MujocoFetchPickAndPlaceEnv",
             kwargs=kwargs,
             max_episode_steps=50,
@@ -58,7 +58,7 @@ def _merge(a, b):
         )
 
         register(
-            id=f"FetchReach{suffix}-v2",
+            id=f"FetchReach{suffix}-v3",
             entry_point="gymnasium_robotics.envs.fetch.reach:MujocoFetchReachEnv",
             kwargs=kwargs,
             max_episode_steps=50,
@@ -72,7 +72,7 @@ def _merge(a, b):
         )
 
         register(
-            id=f"FetchPush{suffix}-v2",
+            id=f"FetchPush{suffix}-v3",
             entry_point="gymnasium_robotics.envs.fetch.push:MujocoFetchPushEnv",
             kwargs=kwargs,
             max_episode_steps=50,
@@ -87,7 +87,7 @@ def _merge(a, b):
         )
 
         register(
-            id=f"HandReach{suffix}-v1",
+            id=f"HandReach{suffix}-v2",
             entry_point="gymnasium_robotics.envs.shadow_dexterous_hand.reach:MujocoHandReachEnv",
             kwargs=kwargs,
             max_episode_steps=50,

diff --git a/gymnasium_robotics/envs/fetch/pick_and_place.py b/gymnasium_robotics/envs/fetch/pick_and_place.py
@@ -88,15 +88,15 @@ class MujocoFetchPickAndPlaceEnv(MujocoFetchEnv, EzPickle):
     - *sparse*: the returned reward can have two values: `-1` if the block hasn't reached its final target position, and `0` if the block is in the final target position (the block is considered to have reached the goal if the Euclidean distance between both is lower than 0.05 m).
     - *dense*: the returned reward is the negative Euclidean distance between the achieved goal position and the desired goal.
 
-    To initialize this environment with one of the mentioned reward functions the type of reward must be specified in the id string when the environment is initialized. For `sparse` reward the id is the default of the environment, `FetchPickAndPlace-v2`. However, for `dense` reward the id must be modified to `FetchPickAndPlaceDense-v2` and initialized as follows:
+    To initialize this environment with one of the mentioned reward functions the type of reward must be specified in the id string when the environment is initialized. For `sparse` reward the id is the default of the environment, `FetchPickAndPlace-3`. However, for `dense` reward the id must be modified to `FetchPickAndPlaceDense-v3` and initialized as follows:
 
     ```python
     import gymnasium as gym
     import gymnasium_robotics
 
     gym.register_envs(gymnasium_robotics)
 
-    env = gym.make('FetchPickAndPlaceDense-v2')
+    env = gym.make('FetchPickAndPlaceDense-v3')
     ```
 
     ## Starting State
@@ -125,11 +125,12 @@ class MujocoFetchPickAndPlaceEnv(MujocoFetchEnv, EzPickle):
 
     gym.register_envs(gymnasium_robotics)
 
-    env = gym.make('FetchPickAndPlace-v2', max_episode_steps=100)
+    env = gym.make('FetchPickAndPlace-v3', max_episode_steps=100)
     ```
 
     ## Version History
 
+    * v3: Fix slight differences between rollouts of the same environment when reset with the same seed.
     * v2: the environment depends on the newest [mujoco python bindings](https://mujoco.readthedocs.io/en/latest/python.html) maintained by the MuJoCo team in Deepmind.
     * v1: the environment depends on `mujoco_py` which is no longer maintained.
     """

diff --git a/gymnasium_robotics/envs/fetch/push.py b/gymnasium_robotics/envs/fetch/push.py
@@ -116,15 +116,15 @@ class MujocoFetchPushEnv(MujocoFetchEnv, EzPickle):
     - *sparse*: the returned reward can have two values: `-1` if the block hasn't reached its final target position, and `0` if the block is in the final target position (the block is considered to have reached the goal if the Euclidean distance between both is lower than 0.05 m).
     - *dense*: the returned reward is the negative Euclidean distance between the achieved goal position and the desired goal.
 
-    To initialize this environment with one of the mentioned reward functions the type of reward must be specified in the id string when the environment is initialized. For `sparse` reward the id is the default of the environment, `FetchPush-v2`. However, for `dense` reward the id must be modified to `FetchPush-v2` and initialized as follows:
+    To initialize this environment with one of the mentioned reward functions the type of reward must be specified in the id string when the environment is initialized. For `sparse` reward the id is the default of the environment, `FetchPush-v3`. However, for `dense` reward the id must be modified to `FetchPushDense-v3` and initialized as follows:
 
     ```python
     import gymnasium as gym
     import gymnasium_robotics
 
     gym.register_envs(gymnasium_robotics)
 
-    env = gym.make('FetchPushDense-v2')
+    env = gym.make('FetchPushDense-v3')
     ```
 
     ## Starting State
@@ -153,11 +153,12 @@ class MujocoFetchPushEnv(MujocoFetchEnv, EzPickle):
 
     gym.register_envs(gymnasium_robotics)
 
-    env = gym.make('FetchPush-v2', max_episode_steps=100)
+    env = gym.make('FetchPush-v3', max_episode_steps=100)
     ```
 
     ## Version History
 
+    * v3: Fix slight differences between rollouts of the same environment when reset with the same seed.
     * v2: the environment depends on the newest [mujoco python bindings](https://mujoco.readthedocs.io/en/latest/python.html) maintained by the MuJoCo team in Deepmind.
     * v1: the environment depends on `mujoco_py` which is no longer maintained.
     """

diff --git a/gymnasium_robotics/envs/fetch/reach.py b/gymnasium_robotics/envs/fetch/reach.py
@@ -77,16 +77,16 @@ class MujocoFetchReachEnv(MujocoFetchEnv, EzPickle):
     the end effector and the goal is lower than 0.05 m).
     - *dense*: the returned reward is the negative Euclidean distance between the achieved goal position and the desired goal.
 
-    To initialize this environment with one of the mentioned reward functions the type of reward must be specified in the id string when the environment is initialized. For `sparse` reward the id is the default of the environment, `FetchReach-v2`. However, for `dense`
-    reward the id must be modified to `FetchReachDense-v2` and initialized as follows:
+    To initialize this environment with one of the mentioned reward functions the type of reward must be specified in the id string when the environment is initialized. For `sparse` reward the id is the default of the environment, `FetchReach-v3`. However, for `dense`
+    reward the id must be modified to `FetchReachDense-v3` and initialized as follows:
 
     ```python
     import gymnasium as gym
     import gymnasium_robotics
 
     gym.register_envs(gymnasium_robotics)
 
-    env = gym.make('FetchReachDense-v2')
+    env = gym.make('FetchReachDense-v3')
     ```
 
     ## Starting State
@@ -111,11 +111,12 @@ class MujocoFetchReachEnv(MujocoFetchEnv, EzPickle):
 
     gym.register_envs(gymnasium_robotics)
 
-    env = gym.make('FetchReach-v2', max_episode_steps=100)
+    env = gym.make('FetchReach-v3', max_episode_steps=100)
     ```
 
     ## Version History
 
+    * v3: Fix slight differences between rollouts of the same environment when reset with the same seed.
     * v2: the environment depends on the newest [mujoco python bindings](https://mujoco.readthedocs.io/en/latest/python.html) maintained by the MuJoCo team in Deepmind.
     * v1: the environment depends on `mujoco_py` which is no longer maintained.
     """

diff --git a/gymnasium_robotics/envs/fetch/slide.py b/gymnasium_robotics/envs/fetch/slide.py
@@ -116,15 +116,15 @@ class MujocoFetchSlideEnv(MujocoFetchEnv, EzPickle):
     - *sparse*: the returned reward can have two values: `-1` if the puck hasn't reached its final target position, and `0` if the puck is in the final target position (the puck is considered to have reached the goal if the Euclidean distance between both is lower than 0.05 m).
     - *dense*: the returned reward is the negative Euclidean distance between the achieved goal position and the desired goal.
 
-    To initialize this environment with one of the mentioned reward functions the type of reward must be specified in the id string when the environment is initialized. For `sparse` reward the id is the default of the environment, `FetchSlide-v2`. However, for `dense` reward the id must be modified to `FetchSlideDense-v2` and initialized as follows:
+    To initialize this environment with one of the mentioned reward functions the type of reward must be specified in the id string when the environment is initialized. For `sparse` reward the id is the default of the environment, `FetchSlide-v3`. However, for `dense` reward the id must be modified to `FetchSlideDense-v3` and initialized as follows:
 
     ```python
     import gymnasium as gym
     import gymnasium_robotics
 
     gym.register_envs(gymnasium_robotics)
 
-    env = gym.make('FetchSlideDense-v2')
+    env = gym.make('FetchSlideDense-v3')
     ```
 
     ## Starting State
@@ -152,11 +152,12 @@ class MujocoFetchSlideEnv(MujocoFetchEnv, EzPickle):
 
     gym.register_envs(gymnasium_robotics)
 
-    env = gym.make('FetchSlide-v2', max_episode_steps=100)
+    env = gym.make('FetchSlide-v3', max_episode_steps=100)
     ```
 
     ## Version History
 
+    * v3: Fix slight differences between rollouts of the same environment when reset with the same seed.
     * v2: the environment depends on the newest [mujoco python bindings](https://mujoco.readthedocs.io/en/latest/python.html) maintained by the MuJoCo team in Deepmind.
     * v1: the environment depends on `mujoco_py` which is no longer maintained.
     """

diff --git a/gymnasium_robotics/envs/shadow_dexterous_hand/reach.py b/gymnasium_robotics/envs/shadow_dexterous_hand/reach.py
@@ -306,13 +306,13 @@ class MujocoHandReachEnv(get_base_hand_reanch_env(MujocoHandEnv)):
     the achieved goal vector and the desired goal vector is lower than 0.01).
     - *dense*: the returned reward is the negative 2-norm distance between the achieved goal vector and the desired goal vector.
 
-    To initialize this environment with one of the mentioned reward functions the type of reward must be specified in the id string when the environment is initialized. For `sparse` reward the id is the default of the environment, `HandReach-v1`.
-    However, for `dense` reward the id must be modified to `HandReachDense-v1` and initialized as follows:
+    To initialize this environment with one of the mentioned reward functions the type of reward must be specified in the id string when the environment is initialized. For `sparse` reward the id is the default of the environment, `HandReach-v2`.
+    However, for `dense` reward the id must be modified to `HandReachDense-v2` and initialized as follows:
 
     ```
     import gymnasium as gym
 
-    env = gym.make('HandReachDense-v1')
+    env = gym.make('HandReachDense-v2')
     ```
 
     ## Starting State
@@ -383,11 +383,12 @@ class MujocoHandReachEnv(get_base_hand_reanch_env(MujocoHandEnv)):
     ```
     import gymnasium as gym
 
-    env = gym.make('HandReach-v1', max_episode_steps=100)
+    env = gym.make('HandReach-v2', max_episode_steps=100)
     ```
 
     ## Version History
 
+    * v2: Fix slight differences between rollouts of the same environment when reset with the same seed.
     * v1: the environment depends on the newest [mujoco python bindings](https://mujoco.readthedocs.io/en/latest/python.html) maintained by the MuJoCo team in Deepmind.
     * v0: the environment depends on `mujoco_py` which is no longer maintained.
 

diff --git a/tests/test_envs.py b/tests/test_envs.py
@@ -3,6 +3,7 @@
 
 import gymnasium as gym
 import pytest
+from gymnasium.envs.mujoco.utils import check_mujoco_reset_state
 from gymnasium.envs.registration import EnvSpec
 from gymnasium.error import Error
 from gymnasium.utils.env_checker import check_env, data_equivalence
@@ -109,16 +110,8 @@ def test_env_determinism_rollout(env_spec: EnvSpec):
 @pytest.mark.parametrize(
     "env_spec", non_mujoco_py_env_specs, ids=[env.id for env in non_mujoco_py_env_specs]
 )
-def test_same_env_determinism_rollout(env_spec: EnvSpec):
-    """Run two rollouts with a single environment and assert equality.
-
-    This test runs two rollouts of NUM_STEPS steps with one environment
-    reset with the same seed and asserts that:
-
-    - observations after the reset are the same
-    - same actions are sampled by the environment
-    - observations are contained in the observation space
-    - obs, rew, terminated, truncated and info are equals between the two rollouts
+def test_mujoco_reset_state_seeding(env_spec: EnvSpec):
+    """Check if the reset method of mujoco environments is deterministic for the same seed.
 
     Note:
         We exclude mujoco_py environments because they are deprecated and their implementation is
@@ -132,77 +125,7 @@ def test_same_env_determinism_rollout(env_spec: EnvSpec):
 
     env = env_spec.make(disable_env_checker=True)
 
-    rollout_1 = {
-        "observations": [],
-        "actions": [],
-        "rewards": [],
-        "terminated": [],
-        "truncated": [],
-        "infos": [],
-    }
-    rollout_2 = {
-        "observations": [],
-        "actions": [],
-        "rewards": [],
-        "terminated": [],
-        "truncated": [],
-        "infos": [],
-    }
-
-    # Run two rollouts of the same environment instance
-    for rollout in [rollout_1, rollout_2]:
-        # Reset the environment with the same seed for both rollouts
-        obs, info = env.reset(seed=SEED)
-        env.action_space.seed(SEED)
-        rollout["observations"].append(obs)
-        rollout["infos"].append(info)
-
-        for time_step in range(NUM_STEPS):
-            action = env.action_space.sample()
-
-            obs, rew, terminated, truncated, info = env.step(action)
-            rollout["observations"].append(obs)
-            rollout["actions"].append(action)
-            rollout["rewards"].append(rew)
-            rollout["terminated"].append(terminated)
-            rollout["truncated"].append(truncated)
-            rollout["infos"].append(info)
-            if terminated or truncated:
-                env.reset(seed=SEED)
-
-    for time_step, (obs_1, obs_2) in enumerate(
-        zip(rollout_1["observations"], rollout_2["observations"])
-    ):
-        # -1 because of the initial observation stored on reset
-        time_step = "initial" if time_step == 0 else time_step - 1
-        assert_equals(obs_1, obs_2, f"[{time_step}] ")
-        assert env.observation_space.contains(
-            obs_1
-        )  # obs_2 verified by previous assertion
-    for time_step, (rew_1, rew_2) in enumerate(
-        zip(rollout_1["rewards"], rollout_2["rewards"])
-    ):
-        assert rew_1 == rew_2, f"[{time_step}] reward 1={rew_1}, reward 2={rew_2}"
-    for time_step, (terminated_1, terminated_2) in enumerate(
-        zip(rollout_1["terminated"], rollout_2["terminated"])
-    ):
-        assert (
-            terminated_1 == terminated_2
-        ), f"[{time_step}] terminated 1={terminated_1}, terminated 2={terminated_2}"
-    for time_step, (truncated_1, truncated_2) in enumerate(
-        zip(rollout_1["truncated"], rollout_2["truncated"])
-    ):
-        assert (
-            truncated_1 == truncated_2
-        ), f"[{time_step}] truncated 1={truncated_1}, truncated 2={truncated_2}"
-    for time_step, (info_1, info_2) in enumerate(
-        zip(rollout_1["infos"], rollout_2["infos"])
-    ):
-        # -1 because of the initial info stored on reset
-        time_step = "initial" if time_step == 0 else time_step - 1
-        assert_equals(info_1, info_2, f"[{time_step}] ")
-
-    env.close()
+    check_mujoco_reset_state(env)
 
 
 @pytest.mark.parametrize(