[RLlib] Add timers to env step, forward pass, and complete connector pipelines runs. (ray-project#51160)

simonsays1980 · web-flow · commit 06da16e4b694 · 2025-03-11T18:39:47.000+01:00
diff --git a/rllib/connectors/connector_pipeline_v2.py b/rllib/connectors/connector_pipeline_v2.py
@@ -7,8 +7,9 @@
 from ray.rllib.core.rl_module.rl_module import RLModule
 from ray.rllib.utils.annotations import override
 from ray.rllib.utils.checkpoints import Checkpointable
-from ray.rllib.utils.metrics import TIMERS, CONNECTOR_TIMERS
+from ray.rllib.utils.metrics import TIMERS, CONNECTOR_PIPELINE_TIMER, CONNECTOR_TIMERS
 from ray.rllib.utils.metrics.metrics_logger import MetricsLogger
+from ray.rllib.utils.metrics.utils import to_snake_case
 from ray.rllib.utils.typing import EpisodeType, StateDict
 from ray.util.annotations import PublicAPI
 
@@ -95,6 +96,13 @@ def __call__(
         piece in the pipeline.
         """
         shared_data = shared_data if shared_data is not None else {}
+        full_stats = None
+        if metrics:
+            full_stats = metrics.log_time(
+                kwargs.get("metrics_prefix_key", ()) + (CONNECTOR_PIPELINE_TIMER,)
+            )
+            full_stats.__enter__()
+
         # Loop through connector pieces and call each one with the output of the
         # previous one. Thereby, time each connector piece's call.
         for connector in self.connectors:
@@ -104,7 +112,11 @@ def __call__(
             if metrics:
                 stats = metrics.log_time(
                     kwargs.get("metrics_prefix_key", ())
-                    + (TIMERS, CONNECTOR_TIMERS, connector.__class__.__name__)
+                    + (
+                        TIMERS,
+                        CONNECTOR_TIMERS,
+                        to_snake_case(connector.__class__.__name__),
+                    )
                 )
                 stats.__enter__()
 
@@ -131,6 +143,9 @@ def __call__(
                     f"the `data` arg passed in (either altered or unchanged)."
                 )
 
+        if metrics:
+            full_stats.__exit__(None, None, None)
+
         return batch
 
     def remove(self, name_or_class: Union[str, Type]):
diff --git a/rllib/connectors/learner/learner_connector_pipeline.py b/rllib/connectors/learner/learner_connector_pipeline.py
@@ -4,6 +4,7 @@
 from ray.rllib.utils.annotations import override
 from ray.rllib.utils.metrics import (
     ALL_MODULES,
+    LEARNER_CONNECTOR,
     LEARNER_CONNECTOR_SUM_EPISODES_LENGTH_IN,
     LEARNER_CONNECTOR_SUM_EPISODES_LENGTH_OUT,
 )
@@ -42,7 +43,10 @@ def __call__(
             shared_data=shared_data if shared_data is not None else {},
             explore=explore,
             metrics=metrics,
-            metrics_prefix_key=(ALL_MODULES,),
+            metrics_prefix_key=(
+                ALL_MODULES,
+                LEARNER_CONNECTOR,
+            ),
             **kwargs,
         )
 
diff --git a/rllib/core/learner/tests/test_learner_group.py b/rllib/core/learner/tests/test_learner_group.py
@@ -22,7 +22,7 @@
 from ray.rllib.env.multi_agent_episode import MultiAgentEpisode
 from ray.rllib.env.single_agent_episode import SingleAgentEpisode
 from ray.rllib.examples.envs.classes.multi_agent import MultiAgentCartPole
-from ray.rllib.utils.metrics import ALL_MODULES, TIMERS
+from ray.rllib.utils.metrics import ALL_MODULES, LEARNER_CONNECTOR
 from ray.rllib.utils.metrics.metrics_logger import MetricsLogger
 from ray.rllib.utils.test_utils import check
 from ray.util.timer import _Timer
@@ -474,8 +474,8 @@ def test_save_to_path_and_restore_from_path(self):
                 results_2nd_update_with_break,
                 results_2nd_update_without_break,
             ):
-                r1[ALL_MODULES].pop(TIMERS)
-                r2[ALL_MODULES].pop(TIMERS)
+                r1[ALL_MODULES].pop(LEARNER_CONNECTOR)
+                r2[ALL_MODULES].pop(LEARNER_CONNECTOR)
             check(
                 MetricsLogger.peek_results(results_2nd_update_with_break),
                 MetricsLogger.peek_results(results_2nd_update_without_break),
diff --git a/rllib/env/env_runner.py b/rllib/env/env_runner.py
@@ -9,6 +9,8 @@
 from ray.rllib.core import COMPONENT_RL_MODULE
 from ray.rllib.utils.actor_manager import FaultAwareApply
 from ray.rllib.utils.framework import try_import_tf
+from ray.rllib.utils.metrics import ENV_RESET_TIMER, ENV_STEP_TIMER
+from ray.rllib.utils.metrics.metrics_logger import MetricsLogger
 from ray.rllib.utils.torch_utils import convert_to_torch_tensor
 from ray.rllib.utils.typing import StateDict, TensorType
 from ray.util.annotations import PublicAPI, DeveloperAPI
@@ -51,8 +53,10 @@ def __init__(self, *, config: "AlgorithmConfig", **kwargs):
             config: The AlgorithmConfig to use to setup this EnvRunner.
             **kwargs: Forward compatibility kwargs.
         """
-        self.config = config.copy(copy_frozen=False)
+        self.config: AlgorithmConfig = config.copy(copy_frozen=False)
         self.env = None
+        # Create a MetricsLogger object for logging custom stats.
+        self.metrics: MetricsLogger = MetricsLogger()
 
         super().__init__(**kwargs)
 
@@ -160,9 +164,11 @@ def _try_env_reset(self):
         """Tries resetting the env and - if an error orrurs - handles it gracefully."""
         # Try to reset.
         try:
-            obs, infos = self.env.reset(
-                seed=self.config.seed and self.config.seed + (self.worker_index or 0),
-            )
+            with self.metrics.log_time(ENV_RESET_TIMER):
+                obs, infos = self.env.reset(
+                    seed=self.config.seed
+                    and self.config.seed + (self.worker_index or 0),
+                )
             # Everything ok -> return.
             return obs, infos
         # Error.
@@ -183,7 +189,8 @@ def _try_env_reset(self):
     def _try_env_step(self, actions):
         """Tries stepping the env and - if an error orrurs - handles it gracefully."""
         try:
-            results = self.env.step(actions)
+            with self.metrics.log_time(ENV_STEP_TIMER):
+                results = self.env.step(actions)
             return results
         except Exception as e:
             if self.config.restart_failed_sub_environments:
diff --git a/rllib/env/multi_agent_env_runner.py b/rllib/env/multi_agent_env_runner.py
@@ -30,13 +30,15 @@
 from ray.rllib.utils.deprecation import Deprecated
 from ray.rllib.utils.framework import get_device, try_import_torch
 from ray.rllib.utils.metrics import (
+    ENV_TO_MODULE_CONNECTOR,
     EPISODE_DURATION_SEC_MEAN,
     EPISODE_LEN_MAX,
     EPISODE_LEN_MEAN,
     EPISODE_LEN_MIN,
     EPISODE_RETURN_MAX,
     EPISODE_RETURN_MEAN,
     EPISODE_RETURN_MIN,
+    MODULE_TO_ENV_CONNECTOR,
     NUM_AGENT_STEPS_SAMPLED,
     NUM_AGENT_STEPS_SAMPLED_LIFETIME,
     NUM_ENV_STEPS_SAMPLED,
@@ -45,11 +47,11 @@
     NUM_EPISODES_LIFETIME,
     NUM_MODULE_STEPS_SAMPLED,
     NUM_MODULE_STEPS_SAMPLED_LIFETIME,
+    RLMODULE_INFERENCE_TIMER,
     SAMPLE_TIMER,
     TIME_BETWEEN_SAMPLING,
     WEIGHTS_SEQ_NO,
 )
-from ray.rllib.utils.metrics.metrics_logger import MetricsLogger
 from ray.rllib.utils.pre_checks.env import check_multiagent_environments
 from ray.rllib.utils.typing import EpisodeID, ModelWeights, ResultDict, StateDict
 from ray.tune.registry import ENV_CREATOR, _global_registry
@@ -88,8 +90,6 @@ def __init__(self, config: AlgorithmConfig, **kwargs):
         self.worker_index: int = kwargs.get("worker_index")
         self.tune_trial_id: str = kwargs.get("tune_trial_id")
 
-        # Set up all metrics-related structures and counters.
-        self.metrics: Optional[MetricsLogger] = None
         self._setup_metrics()
 
         # Create our callbacks object.
@@ -310,11 +310,13 @@ def _sample(
                             self.metrics.peek(NUM_ENV_STEPS_SAMPLED_LIFETIME, default=0)
                             + ts
                         ) * (self.config.num_env_runners or 1)
-                        to_env = self.module.forward_exploration(
-                            to_module, t=global_env_steps_lifetime
-                        )
+                        with self.metrics.log_time(RLMODULE_INFERENCE_TIMER):
+                            to_env = self.module.forward_exploration(
+                                to_module, t=global_env_steps_lifetime
+                            )
                     else:
-                        to_env = self.module.forward_inference(to_module)
+                        with self.metrics.log_time(RLMODULE_INFERENCE_TIMER):
+                            to_env = self.module.forward_inference(to_module)
 
                     # Module-to-env connector.
                     to_env = self._module_to_env(
@@ -324,6 +326,7 @@ def _sample(
                         explore=explore,
                         shared_data=shared_data,
                         metrics=self.metrics,
+                        metrics_prefix_key=(MODULE_TO_ENV_CONNECTOR,),
                     )
                 # In case all environments had been terminated `to_module` will be
                 # empty and no actions are needed b/c we reset all environemnts.
@@ -453,19 +456,23 @@ def _sample(
                     # Run the env-to-module connector pipeline for all done episodes.
                     # Note, this is needed to postprocess last-step data, e.g. if the
                     # user uses a connector that one-hot encodes observations.
+                    # Note, this pipeline run is not timed as the number of episodes
+                    # can differ from `num_envs_per_env_runner` and would bias time
+                    # measurements.
                     self._env_to_module(
                         episodes=done_episodes_to_run_env_to_module,
                         explore=explore,
                         rl_module=self.module,
                         shared_data=shared_data,
-                        metrics=self.metrics,
+                        metrics=None,
                     )
                 self._cached_to_module = self._env_to_module(
                     episodes=episodes,
                     explore=explore,
                     rl_module=self.module,
                     shared_data=shared_data,
                     metrics=self.metrics,
+                    metrics_prefix_key=(ENV_TO_MODULE_CONNECTOR,),
                 )
 
             # Numpy'ize the done episodes after running the connector pipeline. Note,
@@ -544,6 +551,7 @@ def _reset_envs(self, episodes, shared_data, explore):
                 explore=explore,
                 shared_data=shared_data,
                 metrics=self.metrics,
+                metrics_key_prefix=(ENV_TO_MODULE_CONNECTOR,),
             )
 
         # Call `on_episode_start()` callbacks (always after reset).
@@ -871,8 +879,6 @@ def stop(self):
         self.env.close()
 
     def _setup_metrics(self):
-        self.metrics = MetricsLogger()
-
         self._done_episodes_for_metrics: List[MultiAgentEpisode] = []
         self._ongoing_episodes_for_metrics: DefaultDict[
             EpisodeID, List[MultiAgentEpisode]
diff --git a/rllib/env/single_agent_env_runner.py b/rllib/env/single_agent_env_runner.py
@@ -32,13 +32,15 @@
 from ray.rllib.utils.deprecation import Deprecated
 from ray.rllib.utils.framework import get_device
 from ray.rllib.utils.metrics import (
+    ENV_TO_MODULE_CONNECTOR,
     EPISODE_DURATION_SEC_MEAN,
     EPISODE_LEN_MAX,
     EPISODE_LEN_MEAN,
     EPISODE_LEN_MIN,
     EPISODE_RETURN_MAX,
     EPISODE_RETURN_MEAN,
     EPISODE_RETURN_MIN,
+    MODULE_TO_ENV_CONNECTOR,
     NUM_AGENT_STEPS_SAMPLED,
     NUM_AGENT_STEPS_SAMPLED_LIFETIME,
     NUM_ENV_STEPS_SAMPLED,
@@ -47,11 +49,11 @@
     NUM_EPISODES_LIFETIME,
     NUM_MODULE_STEPS_SAMPLED,
     NUM_MODULE_STEPS_SAMPLED_LIFETIME,
+    RLMODULE_INFERENCE_TIMER,
     SAMPLE_TIMER,
     TIME_BETWEEN_SAMPLING,
     WEIGHTS_SEQ_NO,
 )
-from ray.rllib.utils.metrics.metrics_logger import MetricsLogger
 from ray.rllib.utils.spaces.space_utils import unbatch
 from ray.rllib.utils.typing import EpisodeID, ResultDict, StateDict
 from ray.tune.registry import ENV_CREATOR, _global_registry
@@ -80,9 +82,6 @@ def __init__(self, *, config: AlgorithmConfig, **kwargs):
         self.num_workers: int = kwargs.get("num_workers", self.config.num_env_runners)
         self.tune_trial_id: str = kwargs.get("tune_trial_id")
 
-        # Create a MetricsLogger object for logging custom stats.
-        self.metrics = MetricsLogger()
-
         # Create our callbacks object.
         self._callbacks: List[RLlibCallback] = [
             cls() for cls in force_list(self.config.callbacks_class)
@@ -296,11 +295,13 @@ def _sample(
                         self.metrics.peek(NUM_ENV_STEPS_SAMPLED_LIFETIME, default=0)
                         + ts
                     ) * (self.config.num_env_runners or 1)
-                    to_env = self.module.forward_exploration(
-                        to_module, t=global_env_steps_lifetime
-                    )
+                    with self.metrics.log_time(RLMODULE_INFERENCE_TIMER):
+                        to_env = self.module.forward_exploration(
+                            to_module, t=global_env_steps_lifetime
+                        )
                 else:
-                    to_env = self.module.forward_inference(to_module)
+                    with self.metrics.log_time(RLMODULE_INFERENCE_TIMER):
+                        to_env = self.module.forward_inference(to_module)
 
                 # Module-to-env connector.
                 to_env = self._module_to_env(
@@ -310,6 +311,7 @@ def _sample(
                     explore=explore,
                     shared_data=shared_data,
                     metrics=self.metrics,
+                    metrics_prefix_key=(MODULE_TO_ENV_CONNECTOR,),
                 )
 
             # Extract the (vectorized) actions (to be sent to the env) from the
@@ -370,6 +372,7 @@ def _sample(
                     rl_module=self.module,
                     shared_data=shared_data,
                     metrics=self.metrics,
+                    metrics_prefix_key=(ENV_TO_MODULE_CONNECTOR,),
                 )
 
             for env_index in range(self.num_envs):
@@ -738,6 +741,7 @@ def _reset_envs(self, episodes, shared_data, explore):
                 explore=explore,
                 shared_data=shared_data,
                 metrics=self.metrics,
+                metrics_prefix_key=(ENV_TO_MODULE_CONNECTOR,),
             )
 
         # Call `on_episode_start()` callbacks (always after reset).
diff --git a/rllib/utils/metrics/__init__.py b/rllib/utils/metrics/__init__.py
@@ -160,14 +160,21 @@
 GRAD_WAIT_TIMER = "grad_wait"
 SAMPLE_TIMER = "sample"  # @OldAPIStack
 ENV_RUNNER_SAMPLING_TIMER = "env_runner_sampling_timer"
+ENV_RESET_TIMER = "env_reset_timer"
+ENV_STEP_TIMER = "env_step_timer"
+ENV_TO_MODULE_CONNECTOR = "env_to_module_connector"
+RLMODULE_INFERENCE_TIMER = "rlmodule_inference_timer"
+MODULE_TO_ENV_CONNECTOR = "module_to_env_connector"
 OFFLINE_SAMPLING_TIMER = "offline_sampling_timer"
 REPLAY_BUFFER_ADD_DATA_TIMER = "replay_buffer_add_data_timer"
 REPLAY_BUFFER_SAMPLE_TIMER = "replay_buffer_sampling_timer"
 REPLAY_BUFFER_UPDATE_PRIOS_TIMER = "replay_buffer_update_prios_timer"
+LEARNER_CONNECTOR = "learner_connector"
 LEARNER_UPDATE_TIMER = "learner_update_timer"
 LEARN_ON_BATCH_TIMER = "learn"  # @OldAPIStack
 LOAD_BATCH_TIMER = "load"
 TARGET_NET_UPDATE_TIMER = "target_net_update"
+CONNECTOR_PIPELINE_TIMER = "connector_pipeline_timer"
 CONNECTOR_TIMERS = "connectors"
 
 # Learner.
diff --git a/rllib/utils/metrics/stats.py b/rllib/utils/metrics/stats.py
@@ -9,11 +9,13 @@
 from ray.rllib.utils import force_list
 from ray.rllib.utils.framework import try_import_tf, try_import_torch
 from ray.rllib.utils.numpy import convert_to_numpy
+from ray.util.annotations import DeveloperAPI
 
 _, tf, _ = try_import_tf()
 torch, _ = try_import_torch()
 
 
+@DeveloperAPI
 class Stats:
     """A container class holding a number of values and executing reductions over them.
 
diff --git a/rllib/utils/metrics/utils.py b/rllib/utils/metrics/utils.py
@@ -0,0 +1,15 @@
+import re
+
+
+def to_snake_case(class_name: str) -> str:
+    """Converts class name to snake case.
+
+    This is used to unify metrics names when using class names within.
+    Args:
+        class_name: A string defining a class name (usually in camel
+        case).
+
+    Returns:
+        The class name in snake case.
+    """
+    return re.sub(r"(?<!^)(?=[A-Z])", "_", class_name).lower()