Farama-Foundation · Markus28 · Mar 29, 2024 · Mar 29, 2024 · pseudo-rnd-thoughts · Apr 17, 2024
diff --git a/gymnasium/wrappers/__init__.py b/gymnasium/wrappers/__init__.py
@@ -58,7 +58,7 @@
     TimeLimit,
 )
 from gymnasium.wrappers.rendering import HumanRendering, RecordVideo, RenderCollection
-from gymnasium.wrappers.stateful_action import StickyAction
+from gymnasium.wrappers.stateful_action import RepeatAction, StickyAction
 from gymnasium.wrappers.stateful_observation import (
     DelayObservation,
     FrameStackObservation,

diff --git a/gymnasium/wrappers/stateful_action.py b/gymnasium/wrappers/stateful_action.py
@@ -1,14 +1,18 @@
-"""``StickyAction`` wrapper - There is a probability that the action is taken again."""
+"""A collection of wrappers for modifying actions.
+
+* ``StickyAction`` wrapper - There is a probability that the action is taken again.
+* ``RepeatAction`` wrapper - Repeat a single action multiple times.
+"""
 from __future__ import annotations
 
-from typing import Any
+from typing import Any, SupportsFloat
 
 import gymnasium as gym
-from gymnasium.core import ActType, ObsType
+from gymnasium.core import ActType, ObsType, WrapperActType, WrapperObsType
 from gymnasium.error import InvalidProbability
 
 
-__all__ = ["StickyAction"]
+__all__ = ["StickyAction", "RepeatAction"]
 
 
 class StickyAction(
@@ -80,3 +84,71 @@ def action(self, action: ActType) -> ActType:
 
         self.last_action = action
         return action
+
+
+class RepeatAction(
+    gym.Wrapper[ObsType, ActType, ObsType, ActType], gym.utils.RecordConstructorArgs
+):
+    """Repeatedly executes a given action in the underlying environment.
+
+    Upon calling the `step` method of this wrapper, `num_repeats`-many steps will be taken
+    with the same action in the underlying environment.
+    The wrapper sums the rewards collected from the underlying environment and returns the last
+    environment state observed.
+    If a termination or truncation is encountered during these steps, the wrapper will stop prematurely.
+    The `info` will additionally contain a field `"num_action_repetitions"`, which specifies
+    how many steps were actually taken.
+
+    Example:
+        >>> import gymnasium as gym
+        >>> env = gym.make("CartPole-v1")
+        >>> wrapped = RepeatAction(env, num_repeats=2)
+        >>> env.reset(seed=123)
+        (array([ 0.01823519, -0.0446179 , -0.02796401, -0.03156282], dtype=float32), {})
+        >>> env.step(0)
+        (array([ 0.01734283, -0.23932791, -0.02859527,  0.25216764], dtype=float32), 1.0, False, False, {})
+        >>> env.step(0)         # Perform the same action again
+        (array([ 0.01255627, -0.43403012, -0.02355192,  0.5356957 ], dtype=float32), 1.0, False, False, {})
+        >>> wrapped.reset(seed=123)         # Now we do the same thing with the `RepeatAction` wrapper
+        (array([ 0.01823519, -0.0446179 , -0.02796401, -0.03156282], dtype=float32), {})
+        >>> wrapped.step(0)
+        (array([ 0.01255627, -0.43403012, -0.02355192,  0.5356957 ], dtype=float32), 2.0, False, False, {'num_action_repetitions': 2})
+    """
+
+    def __init__(self, env: gym.Env[ObsType, ActType], num_repeats: int):
+        """Initialize RepeatAction wrapper.
+
+        Args:
+            env (Env): the wrapped environment
+            num_repeats (int): the maximum number of times to repeat the action
+        """
+        if num_repeats <= 1:
+            raise ValueError(
+                f"Number of action repeats should be greater than 1, but got {num_repeats}"
+            )
+
+        gym.utils.RecordConstructorArgs.__init__(self, num_repeats=num_repeats)
+        gym.Wrapper.__init__(self, env)
+        self._num_repeats = num_repeats
+
+    def step(
+        self, action: WrapperActType
+    ) -> tuple[WrapperObsType, SupportsFloat, bool, bool, dict[str, Any]]:
+        """Repeat `action` several times.
+
+        This step method will execute `action` at most `num_repeats`-many times in `self.env`,
+        or until a termination or truncation is encountered. The reward returned
+        is the sum of rewards collected from `self.env`. The last observation from the
+        environment is returned.
+        """
+        num_steps = 0
+        total_reward = 0
+        assert self._num_repeats > 0
+        for _ in range(self._num_repeats):
+            observation, reward, terminated, truncated, info = self.env.step(action)
+            total_reward += reward
+            num_steps += 1
+            if terminated or truncated:
+                break
+        info["num_action_repetitions"] = num_steps
+        return observation, total_reward, terminated, truncated, info