Skip to content

Commit

Permalink
Bump versions of affected environments. Change documentation to refle…
Browse files Browse the repository at this point in the history
…ct new version number. Simplify reset test.
  • Loading branch information
amacati committed Feb 29, 2024
1 parent 007afa8 commit 9dd88f7
Show file tree
Hide file tree
Showing 12 changed files with 39 additions and 111 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ goal, e.g. state derived from the simulation.
```python
import gymnasium as gym

env = gym.make("FetchReach-v2")
env = gym.make("FetchReach-v3")
env.reset()
obs, reward, terminated, truncated, info = env.step(env.action_space.sample())

Expand Down
2 changes: 1 addition & 1 deletion docs/content/multi-goal_api.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ import gymnasium_robotics

gym.register_envs(gymnasium_robotics)

env = gym.make("FetchReach-v2")
env = gym.make("FetchReach-v3")
env.reset()
obs, reward, terminated, truncated, info = env.step(env.action_space.sample())

Expand Down
8 changes: 4 additions & 4 deletions docs/envs/fetch/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,10 @@ lastpage:

The Fetch environments are based on the 7-DoF [Fetch Mobile Manipulator](https://fetchrobotics.com/) arm, with a two-fingered parallel gripper attached to it. The main environment tasks are the following:

* `FetchReach-v2`: Fetch has to move its end-effector to the desired goal position.
* `FetchPush-v2`: Fetch has to move a box by pushing it until it reaches a desired goal position.
* `FetchSlide-v2`: Fetch has to hit a puck across a long table such that it slides and comes to rest on the desired goal.
* `FetchPickAndPlace-v2`: Fetch has to pick up a box from a table using its gripper and move it to a desired goal above the table.
* `FetchReach-v3`: Fetch has to move its end-effector to the desired goal position.
* `FetchPush-v3`: Fetch has to move a box by pushing it until it reaches a desired goal position.
* `FetchSlide-v3`: Fetch has to hit a puck across a long table such that it slides and comes to rest on the desired goal.
* `FetchPickAndPlace-v3`: Fetch has to pick up a box from a table using its gripper and move it to a desired goal above the table.

```{raw} html
:file: list.html
Expand Down
2 changes: 1 addition & 1 deletion docs/envs/shadow_dexterous_hand/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ lastpage:

These environments are based on the [Shadow Dexterous Hand](https://www.shadowrobot.com/), 5 which is an anthropomorphic robotic hand with 24 degrees of freedom. Of those 24 joints, 20 can be controlled independently whereas the remaining ones are coupled joints.

* `HandReach-v1`: ShadowHand has to reach with its thumb and a selected finger until they meet at a desired goal position above the palm.
* `HandReach-v2`: ShadowHand has to reach with its thumb and a selected finger until they meet at a desired goal position above the palm.
* `HandManipulateBlock-v1`: ShadowHand has to manipulate a block until it achieves a desired goal position and rotation.
* `HandManipulateEgg-v1`: ShadowHand has to manipulate an egg until it achieves a desired goal position and rotation.
* `HandManipulatePen-v1`: ShadowHand has to manipulate a pen until it achieves a desired goal position and rotation.
Expand Down
2 changes: 1 addition & 1 deletion docs/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ import gymnasium_robotics
gym.register_envs(gymnasium_robotics)
env = gym.make("FetchPickAndPlace-v2", render_mode="human")
env = gym.make("FetchPickAndPlace-v3", render_mode="human")
observation, info = env.reset(seed=42)
for _ in range(1000):
action = policy(observation) # User-defined policy function
Expand Down
10 changes: 5 additions & 5 deletions gymnasium_robotics/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def _merge(a, b):
)

register(
id=f"FetchSlide{suffix}-v2",
id=f"FetchSlide{suffix}-v3",
entry_point="gymnasium_robotics.envs.fetch.slide:MujocoFetchSlideEnv",
kwargs=kwargs,
max_episode_steps=50,
Expand All @@ -44,7 +44,7 @@ def _merge(a, b):
)

register(
id=f"FetchPickAndPlace{suffix}-v2",
id=f"FetchPickAndPlace{suffix}-v3",
entry_point="gymnasium_robotics.envs.fetch.pick_and_place:MujocoFetchPickAndPlaceEnv",
kwargs=kwargs,
max_episode_steps=50,
Expand All @@ -58,7 +58,7 @@ def _merge(a, b):
)

register(
id=f"FetchReach{suffix}-v2",
id=f"FetchReach{suffix}-v3",
entry_point="gymnasium_robotics.envs.fetch.reach:MujocoFetchReachEnv",
kwargs=kwargs,
max_episode_steps=50,
Expand All @@ -72,7 +72,7 @@ def _merge(a, b):
)

register(
id=f"FetchPush{suffix}-v2",
id=f"FetchPush{suffix}-v3",
entry_point="gymnasium_robotics.envs.fetch.push:MujocoFetchPushEnv",
kwargs=kwargs,
max_episode_steps=50,
Expand All @@ -87,7 +87,7 @@ def _merge(a, b):
)

register(
id=f"HandReach{suffix}-v1",
id=f"HandReach{suffix}-v2",
entry_point="gymnasium_robotics.envs.shadow_dexterous_hand.reach:MujocoHandReachEnv",
kwargs=kwargs,
max_episode_steps=50,
Expand Down
7 changes: 4 additions & 3 deletions gymnasium_robotics/envs/fetch/pick_and_place.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,15 +88,15 @@ class MujocoFetchPickAndPlaceEnv(MujocoFetchEnv, EzPickle):
- *sparse*: the returned reward can have two values: `-1` if the block hasn't reached its final target position, and `0` if the block is in the final target position (the block is considered to have reached the goal if the Euclidean distance between both is lower than 0.05 m).
- *dense*: the returned reward is the negative Euclidean distance between the achieved goal position and the desired goal.
To initialize this environment with one of the mentioned reward functions the type of reward must be specified in the id string when the environment is initialized. For `sparse` reward the id is the default of the environment, `FetchPickAndPlace-v2`. However, for `dense` reward the id must be modified to `FetchPickAndPlaceDense-v2` and initialized as follows:
To initialize this environment with one of the mentioned reward functions the type of reward must be specified in the id string when the environment is initialized. For `sparse` reward the id is the default of the environment, `FetchPickAndPlace-3`. However, for `dense` reward the id must be modified to `FetchPickAndPlaceDense-v3` and initialized as follows:
```python
import gymnasium as gym
import gymnasium_robotics
gym.register_envs(gymnasium_robotics)
env = gym.make('FetchPickAndPlaceDense-v2')
env = gym.make('FetchPickAndPlaceDense-v3')
```
## Starting State
Expand Down Expand Up @@ -125,11 +125,12 @@ class MujocoFetchPickAndPlaceEnv(MujocoFetchEnv, EzPickle):
gym.register_envs(gymnasium_robotics)
env = gym.make('FetchPickAndPlace-v2', max_episode_steps=100)
env = gym.make('FetchPickAndPlace-v3', max_episode_steps=100)
```
## Version History
* v3: Fix slight differences between rollouts of the same environment when reset with the same seed.
* v2: the environment depends on the newest [mujoco python bindings](https://mujoco.readthedocs.io/en/latest/python.html) maintained by the MuJoCo team in Deepmind.
* v1: the environment depends on `mujoco_py` which is no longer maintained.
"""
Expand Down
7 changes: 4 additions & 3 deletions gymnasium_robotics/envs/fetch/push.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,15 +116,15 @@ class MujocoFetchPushEnv(MujocoFetchEnv, EzPickle):
- *sparse*: the returned reward can have two values: `-1` if the block hasn't reached its final target position, and `0` if the block is in the final target position (the block is considered to have reached the goal if the Euclidean distance between both is lower than 0.05 m).
- *dense*: the returned reward is the negative Euclidean distance between the achieved goal position and the desired goal.
To initialize this environment with one of the mentioned reward functions the type of reward must be specified in the id string when the environment is initialized. For `sparse` reward the id is the default of the environment, `FetchPush-v2`. However, for `dense` reward the id must be modified to `FetchPush-v2` and initialized as follows:
To initialize this environment with one of the mentioned reward functions the type of reward must be specified in the id string when the environment is initialized. For `sparse` reward the id is the default of the environment, `FetchPush-v3`. However, for `dense` reward the id must be modified to `FetchPushDense-v3` and initialized as follows:
```python
import gymnasium as gym
import gymnasium_robotics
gym.register_envs(gymnasium_robotics)
env = gym.make('FetchPushDense-v2')
env = gym.make('FetchPushDense-v3')
```
## Starting State
Expand Down Expand Up @@ -153,11 +153,12 @@ class MujocoFetchPushEnv(MujocoFetchEnv, EzPickle):
gym.register_envs(gymnasium_robotics)
env = gym.make('FetchPush-v2', max_episode_steps=100)
env = gym.make('FetchPush-v3', max_episode_steps=100)
```
## Version History
* v3: Fix slight differences between rollouts of the same environment when reset with the same seed.
* v2: the environment depends on the newest [mujoco python bindings](https://mujoco.readthedocs.io/en/latest/python.html) maintained by the MuJoCo team in Deepmind.
* v1: the environment depends on `mujoco_py` which is no longer maintained.
"""
Expand Down
9 changes: 5 additions & 4 deletions gymnasium_robotics/envs/fetch/reach.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,16 +77,16 @@ class MujocoFetchReachEnv(MujocoFetchEnv, EzPickle):
the end effector and the goal is lower than 0.05 m).
- *dense*: the returned reward is the negative Euclidean distance between the achieved goal position and the desired goal.
To initialize this environment with one of the mentioned reward functions the type of reward must be specified in the id string when the environment is initialized. For `sparse` reward the id is the default of the environment, `FetchReach-v2`. However, for `dense`
reward the id must be modified to `FetchReachDense-v2` and initialized as follows:
To initialize this environment with one of the mentioned reward functions the type of reward must be specified in the id string when the environment is initialized. For `sparse` reward the id is the default of the environment, `FetchReach-v3`. However, for `dense`
reward the id must be modified to `FetchReachDense-v3` and initialized as follows:
```python
import gymnasium as gym
import gymnasium_robotics
gym.register_envs(gymnasium_robotics)
env = gym.make('FetchReachDense-v2')
env = gym.make('FetchReachDense-v3')
```
## Starting State
Expand All @@ -111,11 +111,12 @@ class MujocoFetchReachEnv(MujocoFetchEnv, EzPickle):
gym.register_envs(gymnasium_robotics)
env = gym.make('FetchReach-v2', max_episode_steps=100)
env = gym.make('FetchReach-v3', max_episode_steps=100)
```
## Version History
* v3: Fix slight differences between rollouts of the same environment when reset with the same seed.
* v2: the environment depends on the newest [mujoco python bindings](https://mujoco.readthedocs.io/en/latest/python.html) maintained by the MuJoCo team in Deepmind.
* v1: the environment depends on `mujoco_py` which is no longer maintained.
"""
Expand Down
7 changes: 4 additions & 3 deletions gymnasium_robotics/envs/fetch/slide.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,15 +116,15 @@ class MujocoFetchSlideEnv(MujocoFetchEnv, EzPickle):
- *sparse*: the returned reward can have two values: `-1` if the puck hasn't reached its final target position, and `0` if the puck is in the final target position (the puck is considered to have reached the goal if the Euclidean distance between both is lower than 0.05 m).
- *dense*: the returned reward is the negative Euclidean distance between the achieved goal position and the desired goal.
To initialize this environment with one of the mentioned reward functions the type of reward must be specified in the id string when the environment is initialized. For `sparse` reward the id is the default of the environment, `FetchSlide-v2`. However, for `dense` reward the id must be modified to `FetchSlideDense-v2` and initialized as follows:
To initialize this environment with one of the mentioned reward functions the type of reward must be specified in the id string when the environment is initialized. For `sparse` reward the id is the default of the environment, `FetchSlide-v3`. However, for `dense` reward the id must be modified to `FetchSlideDense-v3` and initialized as follows:
```python
import gymnasium as gym
import gymnasium_robotics
gym.register_envs(gymnasium_robotics)
env = gym.make('FetchSlideDense-v2')
env = gym.make('FetchSlideDense-v3')
```
## Starting State
Expand Down Expand Up @@ -152,11 +152,12 @@ class MujocoFetchSlideEnv(MujocoFetchEnv, EzPickle):
gym.register_envs(gymnasium_robotics)
env = gym.make('FetchSlide-v2', max_episode_steps=100)
env = gym.make('FetchSlide-v3', max_episode_steps=100)
```
## Version History
* v3: Fix slight differences between rollouts of the same environment when reset with the same seed.
* v2: the environment depends on the newest [mujoco python bindings](https://mujoco.readthedocs.io/en/latest/python.html) maintained by the MuJoCo team in Deepmind.
* v1: the environment depends on `mujoco_py` which is no longer maintained.
"""
Expand Down
9 changes: 5 additions & 4 deletions gymnasium_robotics/envs/shadow_dexterous_hand/reach.py
Original file line number Diff line number Diff line change
Expand Up @@ -306,13 +306,13 @@ class MujocoHandReachEnv(get_base_hand_reanch_env(MujocoHandEnv)):
the achieved goal vector and the desired goal vector is lower than 0.01).
- *dense*: the returned reward is the negative 2-norm distance between the achieved goal vector and the desired goal vector.
To initialize this environment with one of the mentioned reward functions the type of reward must be specified in the id string when the environment is initialized. For `sparse` reward the id is the default of the environment, `HandReach-v1`.
However, for `dense` reward the id must be modified to `HandReachDense-v1` and initialized as follows:
To initialize this environment with one of the mentioned reward functions the type of reward must be specified in the id string when the environment is initialized. For `sparse` reward the id is the default of the environment, `HandReach-v2`.
However, for `dense` reward the id must be modified to `HandReachDense-v2` and initialized as follows:
```
import gymnasium as gym
env = gym.make('HandReachDense-v1')
env = gym.make('HandReachDense-v2')
```
## Starting State
Expand Down Expand Up @@ -383,11 +383,12 @@ class MujocoHandReachEnv(get_base_hand_reanch_env(MujocoHandEnv)):
```
import gymnasium as gym
env = gym.make('HandReach-v1', max_episode_steps=100)
env = gym.make('HandReach-v2', max_episode_steps=100)
```
## Version History
* v2: Fix slight differences between rollouts of the same environment when reset with the same seed.
* v1: the environment depends on the newest [mujoco python bindings](https://mujoco.readthedocs.io/en/latest/python.html) maintained by the MuJoCo team in Deepmind.
* v0: the environment depends on `mujoco_py` which is no longer maintained.
Expand Down
85 changes: 4 additions & 81 deletions tests/test_envs.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

import gymnasium as gym
import pytest
from gymnasium.envs.mujoco.utils import check_mujoco_reset_state
from gymnasium.envs.registration import EnvSpec
from gymnasium.error import Error
from gymnasium.utils.env_checker import check_env, data_equivalence
Expand Down Expand Up @@ -109,16 +110,8 @@ def test_env_determinism_rollout(env_spec: EnvSpec):
@pytest.mark.parametrize(
"env_spec", non_mujoco_py_env_specs, ids=[env.id for env in non_mujoco_py_env_specs]
)
def test_same_env_determinism_rollout(env_spec: EnvSpec):
"""Run two rollouts with a single environment and assert equality.
This test runs two rollouts of NUM_STEPS steps with one environment
reset with the same seed and asserts that:
- observations after the reset are the same
- same actions are sampled by the environment
- observations are contained in the observation space
- obs, rew, terminated, truncated and info are equals between the two rollouts
def test_mujoco_reset_state_seeding(env_spec: EnvSpec):
"""Check if the reset method of mujoco environments is deterministic for the same seed.
Note:
We exclude mujoco_py environments because they are deprecated and their implementation is
Expand All @@ -132,77 +125,7 @@ def test_same_env_determinism_rollout(env_spec: EnvSpec):

env = env_spec.make(disable_env_checker=True)

rollout_1 = {
"observations": [],
"actions": [],
"rewards": [],
"terminated": [],
"truncated": [],
"infos": [],
}
rollout_2 = {
"observations": [],
"actions": [],
"rewards": [],
"terminated": [],
"truncated": [],
"infos": [],
}

# Run two rollouts of the same environment instance
for rollout in [rollout_1, rollout_2]:
# Reset the environment with the same seed for both rollouts
obs, info = env.reset(seed=SEED)
env.action_space.seed(SEED)
rollout["observations"].append(obs)
rollout["infos"].append(info)

for time_step in range(NUM_STEPS):
action = env.action_space.sample()

obs, rew, terminated, truncated, info = env.step(action)
rollout["observations"].append(obs)
rollout["actions"].append(action)
rollout["rewards"].append(rew)
rollout["terminated"].append(terminated)
rollout["truncated"].append(truncated)
rollout["infos"].append(info)
if terminated or truncated:
env.reset(seed=SEED)

for time_step, (obs_1, obs_2) in enumerate(
zip(rollout_1["observations"], rollout_2["observations"])
):
# -1 because of the initial observation stored on reset
time_step = "initial" if time_step == 0 else time_step - 1
assert_equals(obs_1, obs_2, f"[{time_step}] ")
assert env.observation_space.contains(
obs_1
) # obs_2 verified by previous assertion
for time_step, (rew_1, rew_2) in enumerate(
zip(rollout_1["rewards"], rollout_2["rewards"])
):
assert rew_1 == rew_2, f"[{time_step}] reward 1={rew_1}, reward 2={rew_2}"
for time_step, (terminated_1, terminated_2) in enumerate(
zip(rollout_1["terminated"], rollout_2["terminated"])
):
assert (
terminated_1 == terminated_2
), f"[{time_step}] terminated 1={terminated_1}, terminated 2={terminated_2}"
for time_step, (truncated_1, truncated_2) in enumerate(
zip(rollout_1["truncated"], rollout_2["truncated"])
):
assert (
truncated_1 == truncated_2
), f"[{time_step}] truncated 1={truncated_1}, truncated 2={truncated_2}"
for time_step, (info_1, info_2) in enumerate(
zip(rollout_1["infos"], rollout_2["infos"])
):
# -1 because of the initial info stored on reset
time_step = "initial" if time_step == 0 else time_step - 1
assert_equals(info_1, info_2, f"[{time_step}] ")

env.close()
check_mujoco_reset_state(env)


@pytest.mark.parametrize(
Expand Down

0 comments on commit 9dd88f7

Please sign in to comment.