Migration to Noise module (facebook#4761)

Carl Hvarfner · facebook-github-bot · commit f3c643c7f2ed · 2026-01-13T14:08:24.000-08:00
Summary:

Updates `benchmark.py` to use the new Noise module architecture: passes `problem.noise` instead of `problem.noise_std` to `BenchmarkRunner`, and removes the obsolete `add_custom_noise` replacement since noise is now handled entirely by the `Noise` object on the runner.

Differential Revision: D90597013
diff --git a/ax/benchmark/benchmark.py b/ax/benchmark/benchmark.py
@@ -147,7 +147,7 @@ def get_benchmark_runner(
 
     return BenchmarkRunner(
         test_function=problem.test_function,
-        noise_std=problem.noise_std,
+        noise=problem.noise,
         step_runtime_function=problem.step_runtime_function,
         max_concurrency=max_concurrency,
         force_use_simulated_backend=force_use_simulated_backend,
@@ -189,9 +189,9 @@ def get_oracle_experiment_from_params(
         optimization_config=problem.optimization_config,
     )
 
-    # Ensure noiseless evaluation by replacing any custom noise function with None
-    noiseless_test_function = replace(problem.test_function, add_custom_noise=None)
-    runner = BenchmarkRunner(test_function=noiseless_test_function, noise_std=0.0)
+    # The test function produces ground-truth values; noise is handled by
+    # BenchmarkRunner's Noise object (default is noiseless GaussianNoise).
+    runner = BenchmarkRunner(test_function=problem.test_function)
 
     # Silence INFO logs from ax.core.experiment that state "Attached custom
     # parameterizations"
diff --git a/ax/benchmark/benchmark_problem.py b/ax/benchmark/benchmark_problem.py
@@ -11,6 +11,7 @@
 from ax.benchmark.benchmark_metric import BenchmarkMapMetric, BenchmarkMetric
 from ax.benchmark.benchmark_step_runtime_function import TBenchmarkStepRuntimeFunction
 from ax.benchmark.benchmark_test_function import BenchmarkTestFunction
+from ax.benchmark.noise import GaussianNoise, Noise
 from ax.core.auxiliary import AuxiliaryExperiment, AuxiliaryExperimentPurpose
 from ax.core.metric import Metric
 from ax.core.objective import MultiObjective, Objective, ScalarizedObjective
@@ -43,12 +44,9 @@ class BenchmarkProblem(Base):
             as one trial.
         test_function: A `BenchmarkTestFunction`, which will generate noiseless
             data. This will be used by a `BenchmarkRunner`.
-        noise_std: Describes how noise is added to the output of the
-            `test_function`. If a float, IID random normal noise with that
-            standard deviation is added. A list of floats, or a dict whose keys
-            match `test_functions.outcome_names`, sets different noise
-            standard deviations for the different outcomes produced by the
-            `test_function`. This will be used by a `BenchmarkRunner`.
+        noise: A `Noise` object that determines how noise is added to the
+            ground-truth evaluations produced by the `test_function`. Defaults
+            to noiseless (`GaussianNoise(noise_std=0.0)`).
         optimal_value: The best ground-truth objective value, used for scoring
             optimization results on a scale from 0 to 100, where achieving the
             `optimal_value` receives a score of 100. The `optimal_value` should
@@ -93,7 +91,7 @@ class BenchmarkProblem(Base):
     optimization_config: OptimizationConfig
     num_trials: int
     test_function: BenchmarkTestFunction
-    noise_std: float | Sequence[float] | Mapping[str, float] = 0.0
+    noise: Noise = field(default_factory=GaussianNoise)
     optimal_value: float
     baseline_value: float
     worst_feasible_value: float | None = None
diff --git a/ax/benchmark/benchmark_runner.py b/ax/benchmark/benchmark_runner.py
@@ -7,7 +7,6 @@
 
 from collections.abc import Iterable, Mapping, Sequence
 from dataclasses import dataclass, field
-from math import sqrt
 from typing import Any
 
 import numpy as np
@@ -16,6 +15,7 @@
 from ax.benchmark.benchmark_step_runtime_function import TBenchmarkStepRuntimeFunction
 from ax.benchmark.benchmark_test_function import BenchmarkTestFunction
 from ax.benchmark.benchmark_trial_metadata import BenchmarkTrialMetadata
+from ax.benchmark.noise import GaussianNoise, Noise
 from ax.core.base_trial import BaseTrial, TrialStatus
 from ax.core.batch_trial import BatchTrial
 from ax.core.runner import Runner
@@ -24,7 +24,6 @@
 from ax.runners.simulated_backend import SimulatedBackendRunner
 from ax.utils.common.serialization import TClassDecoderRegistry, TDecoderRegistry
 from ax.utils.testing.backend_simulator import BackendSimulator, BackendSimulatorOptions
-from pyre_extensions import assert_is_instance
 
 
 def _dict_of_arrays_to_df(
@@ -70,54 +69,6 @@ def _dict_of_arrays_to_df(
     return df
 
 
-def _add_noise(
-    df: pd.DataFrame,
-    noise_stds: Mapping[str, float],
-    arm_weights: Mapping[str, float] | None,
-) -> pd.DataFrame:
-    """
-    For each ``Y_true`` in ``df``, with metric name ``metric_name`` and
-    arm name ``arm_name``, add noise with standard deviation
-    ``noise_stds[metric_name] / sqrt_nlzd_arm_weights[arm_name]``,
-    where ``sqrt_nlzd_arm_weights = sqrt(arm_weights[arm_name] /
-    sum(arm_weights.values())])``.
-
-    Args:
-        df: A DataFrame with columns including
-            ["metric_name", "arm_name", "Y_true"].
-        noise_stds: A mapping from metric name to what the standard
-            deviation would be if one arm received the entire
-            sample budget.
-        arm_weights: Either ``None`` if there is only one ``Arm``, or a
-            mapping from ``Arm`` name to the arm's allocation. Using arm
-            weights will increase noise levels, since each ``Arm`` is
-            assumed to receive a fraction of the total sample budget.
-
-    Returns:
-        The original ``df``, now with additional columns ["mean", "sem"].
-    """
-    noiseless = all(v == 0 for v in noise_stds.values())
-    if not noiseless:
-        noise_std_ser = df["metric_name"].map(noise_stds)
-        if arm_weights is not None:
-            nlzd_arm_weights_sqrt = {
-                arm_name: sqrt(weight / sum(arm_weights.values()))
-                for arm_name, weight in arm_weights.items()
-            }
-            arm_weights_ser = df["arm_name"].map(nlzd_arm_weights_sqrt)
-            df["sem"] = noise_std_ser / arm_weights_ser
-
-        else:
-            df["sem"] = noise_std_ser
-
-        df["mean"] = df["Y_true"] + np.random.normal(loc=0, scale=df["sem"])
-
-    else:
-        df["sem"] = 0.0
-        df["mean"] = df["Y_true"]
-    return df
-
-
 def get_total_runtime(
     trial: BaseTrial,
     step_runtime_function: TBenchmarkStepRuntimeFunction | None,
@@ -140,7 +91,7 @@ class BenchmarkRunner(Runner):
     A Runner that produces both observed and ground-truth values.
 
     Observed values equal ground-truth values plus noise, with the noise added
-    according to the standard deviations returned by `get_noise_stds()`.
+    according to the `Noise` object provided.
 
     This runner does require that every benchmark has a ground truth, which
     won't necessarily be true for real-world problems. Such problems fall into
@@ -162,8 +113,9 @@ class BenchmarkRunner(Runner):
     Args:
         test_function: A ``BenchmarkTestFunction`` from which to generate
             deterministic data before adding noise.
-        noise_std: The standard deviation of the noise added to the data. Can be
-            a list or dict to be per-metric.
+        noise: A ``Noise`` object that determines how noise is added to the
+            ground-truth evaluations. Defaults to noiseless
+            (``GaussianNoise(noise_std=0.0)``).
         step_runtime_function: A function that takes in parameters
             (in ``TParameterization`` format) and returns the runtime of a step.
         max_concurrency: The maximum number of trials that can be running at a
@@ -176,25 +128,13 @@ class BenchmarkRunner(Runner):
     """
 
     test_function: BenchmarkTestFunction
-    noise_std: float | Sequence[float] | Mapping[str, float] = 0.0
+    noise: Noise = field(default_factory=GaussianNoise)
     step_runtime_function: TBenchmarkStepRuntimeFunction | None = None
     max_concurrency: int = 1
     force_use_simulated_backend: bool = False
     simulated_backend_runner: SimulatedBackendRunner | None = field(init=False)
 
     def __post_init__(self) -> None:
-        # Check for conflicting noise configuration
-        has_custom_noise = self.test_function.add_custom_noise is not None
-
-        # This works for both lists and dicts, and the user specifies anything
-        # other than 0.0 as noise_std alongside a custom noise, we error out.
-        if has_custom_noise and (self.noise_std != 0.0):
-            raise ValueError(
-                "Cannot specify both `add_custom_noise` on the test function and "
-                "a `noise_std`. Either use `add_custom_noise` for custom "
-                "noise behavior or `noise_std` for default noise behavior."
-            )
-
         use_simulated_backend = (
             (self.max_concurrency > 1)
             or (self.step_runtime_function is not None)
@@ -239,22 +179,6 @@ def get_Y_true(self, params: Mapping[str, TParamValue]) -> npt.NDArray:
             return result[:, None]
         return result
 
-    def get_noise_stds(self) -> dict[str, float]:
-        noise_std = self.noise_std
-        if isinstance(noise_std, float | int):
-            return {name: float(noise_std) for name in self.outcome_names}
-        elif isinstance(noise_std, dict):
-            if not set(noise_std.keys()) == set(self.outcome_names):
-                raise ValueError(
-                    "Noise std must have keys equal to outcome names if given as "
-                    "a dict."
-                )
-            return noise_std
-        # list of floats
-        return dict(
-            zip(self.outcome_names, assert_is_instance(noise_std, list), strict=True)
-        )
-
     def run(self, trial: BaseTrial) -> dict[str, BenchmarkTrialMetadata]:
         """Run the trial by evaluating its parameterization(s).
 
@@ -293,15 +217,13 @@ def run(self, trial: BaseTrial) -> dict[str, BenchmarkTrialMetadata]:
             if isinstance(trial, BatchTrial)
             else None
         )
-        # Check for custom noise function, otherwise use default noise behavior
-        if self.test_function.add_custom_noise is not None:
-            df = self.test_function.add_custom_noise(
-                df, trial, self.get_noise_stds(), arm_weights
-            )
-        else:
-            df = _add_noise(
-                df=df, noise_stds=self.get_noise_stds(), arm_weights=arm_weights
-            )
+        # Use the Noise object to add noise to the ground-truth evaluations
+        df = self.noise.add_noise(
+            df=df,
+            trial=trial,
+            outcome_names=self.outcome_names,
+            arm_weights=arm_weights,
+        )
         df["trial_index"] = trial.index
         df.drop(columns=["Y_true"], inplace=True)
         df["metric_signature"] = df["metric_name"]
diff --git a/ax/benchmark/benchmark_test_function.py b/ax/benchmark/benchmark_test_function.py
@@ -6,53 +6,30 @@
 # pyre-strict
 
 from abc import ABC, abstractmethod
-from collections.abc import Callable, Mapping, Sequence
+from collections.abc import Mapping, Sequence
 from dataclasses import dataclass
 
-import pandas as pd
-from ax.core.base_trial import BaseTrial
 from ax.core.types import TParamValue
 from torch import Tensor
 
-# Type alias for the custom noise function.
-# The callable takes all the arguments that are exposed in the benchmark runner:
-#   - df: The lookup_data().df DataFrame. Mandatory
-#   - trial: The trial being evaluated
-#   - noise_stds: Mapping from metric name to noise std
-#   - arm_weights: Mapping from arm name to weight, or None for single-arm trials
-# And returns a DataFrame with added "mean" and "sem" columns.
-TAddCustomNoise = Callable[
-    [
-        pd.DataFrame,
-        BaseTrial | None,
-        Mapping[str, float] | None,
-        Mapping[str, float] | None,
-    ],
-    pd.DataFrame,
-]
-
 
 @dataclass(kw_only=True)
 class BenchmarkTestFunction(ABC):
     """
     The basic Ax class for generating deterministic data to benchmark against.
 
-    (Noise - if desired - is added by the runner.)
+    (Noise - if desired - is added by the runner using a `Noise` object.)
 
     Args:
         outcome_names: Names of the outcomes.
         n_steps: Number of data points produced per metric and per evaluation. 1
             if data is not time-series. If data is time-series, this will
             eventually become the number of values on a `MapMetric` for
             evaluations that run to completion.
-        add_custom_noise: Optional callable to add custom noise to evaluation
-            results. If provided, it will be called instead of the default noise
-            behavior, overriding the noise_std argument.
     """
 
     outcome_names: Sequence[str]
     n_steps: int = 1
-    add_custom_noise: TAddCustomNoise | None = None
 
     @abstractmethod
     def evaluate_true(self, params: Mapping[str, TParamValue]) -> Tensor:
diff --git a/ax/benchmark/problems/synthetic/bandit.py b/ax/benchmark/problems/synthetic/bandit.py
@@ -10,6 +10,7 @@
 import numpy as np
 from ax.benchmark.benchmark_problem import BenchmarkProblem, get_soo_opt_config
 from ax.benchmark.benchmark_test_functions.synthetic import IdentityTestFunction
+from ax.benchmark.noise import GaussianNoise
 from ax.core.parameter import ChoiceParameter, ParameterType
 from ax.core.search_space import SearchSpace
 
@@ -71,6 +72,6 @@ def get_bandit_problem(num_choices: int = 30, num_trials: int = 3) -> BenchmarkP
         baseline_value=baseline_value,
         test_function=test_function,
         report_inference_value_as_trace=True,
-        noise_std=1.0,
+        noise=GaussianNoise(noise_std=1.0),
         status_quo_params={"x0": num_choices // 2},
     )
diff --git a/ax/benchmark/problems/synthetic/from_botorch.py b/ax/benchmark/problems/synthetic/from_botorch.py
@@ -20,6 +20,7 @@
 )
 from ax.benchmark.benchmark_step_runtime_function import TBenchmarkStepRuntimeFunction
 from ax.benchmark.benchmark_test_functions.botorch_test import BoTorchTestFunction
+from ax.benchmark.noise import GaussianNoise
 from ax.core.auxiliary import AuxiliaryExperiment, AuxiliaryExperimentPurpose
 from ax.core.parameter import ChoiceParameter, ParameterType, RangeParameter
 from ax.core.search_space import SearchSpace
@@ -127,8 +128,9 @@ def create_problem_from_botorch(
             `test_problem_class`. This should *not* include `noise_std` or
             `negate`, since these are handled through Ax benchmarking (as the
             `noise_std` and `lower_is_better` arguments to `BenchmarkProblem`).
-        noise_std: Standard deviation of synthetic noise added to outcomes. If a
-            float, the same noise level is used for all objectives.
+        noise_std: Standard deviation of synthetic Gaussian noise added to outcomes.
+            If a float, the same noise level is used for all objectives.
+            If a list, different noise levels are used for each objective.
         lower_is_better: Whether this is a minimization problem. For MOO, this
             applies to all objectives.
         num_trials: Simply the `num_trials` of the `BenchmarkProblem` created.
@@ -271,7 +273,7 @@ def create_problem_from_botorch(
         search_space=search_space,
         optimization_config=optimization_config,
         test_function=test_function,
-        noise_std=noise_std,
+        noise=GaussianNoise(noise_std=noise_std),
         num_trials=num_trials,
         optimal_value=assert_is_instance(optimal_value, float),
         baseline_value=baseline_value,
diff --git a/ax/benchmark/problems/synthetic/hss/jenatton.py b/ax/benchmark/problems/synthetic/hss/jenatton.py
@@ -11,6 +11,7 @@
 import torch
 from ax.benchmark.benchmark_problem import BenchmarkProblem, get_soo_opt_config
 from ax.benchmark.benchmark_test_function import BenchmarkTestFunction
+from ax.benchmark.noise import GaussianNoise
 from ax.core.parameter import ChoiceParameter, ParameterType, RangeParameter
 from ax.core.search_space import SearchSpace
 from pyre_extensions import none_throws
@@ -116,7 +117,7 @@ def get_jenatton_benchmark_problem(
         search_space=get_jenatton_search_space(),
         optimization_config=optimization_config,
         test_function=Jenatton(outcome_names=[name]),
-        noise_std=noise_std,
+        noise=GaussianNoise(noise_std=noise_std),
         num_trials=num_trials,
         optimal_value=JENATTON_OPTIMAL_VALUE,
         baseline_value=JENATTON_BASELINE_VALUE,
diff --git a/ax/benchmark/tests/problems/synthetic/hss/test_jenatton.py b/ax/benchmark/tests/problems/synthetic/hss/test_jenatton.py
@@ -8,6 +8,7 @@
 from random import random
 
 from ax.benchmark.benchmark_metric import BenchmarkMetric
+from ax.benchmark.noise import GaussianNoise
 from ax.benchmark.problems.synthetic.hss.jenatton import (
     get_jenatton_benchmark_problem,
     jenatton_test_function,
@@ -100,7 +101,9 @@ def test_create_problem(self) -> None:
         self.assertEqual(metric.signature, "Jenatton")
         self.assertTrue(objective.minimize)
         self.assertTrue(metric.lower_is_better)
-        self.assertEqual(problem.noise_std, 0.0)
+        self.assertEqual(
+            assert_is_instance(problem.noise, GaussianNoise).noise_std, 0.0
+        )
         self.assertFalse(assert_is_instance(metric, BenchmarkMetric).observe_noise_sd)
 
         problem = get_jenatton_benchmark_problem(
@@ -109,5 +112,7 @@ def test_create_problem(self) -> None:
         objective = problem.optimization_config.objective
         metric = objective.metric
         self.assertTrue(metric.lower_is_better)
-        self.assertEqual(problem.noise_std, 0.1)
+        self.assertEqual(
+            assert_is_instance(problem.noise, GaussianNoise).noise_std, 0.1
+        )
         self.assertTrue(assert_is_instance(metric, BenchmarkMetric).observe_noise_sd)
diff --git a/ax/benchmark/tests/problems/synthetic/test_from_botorch.py b/ax/benchmark/tests/problems/synthetic/test_from_botorch.py
@@ -11,6 +11,7 @@
 from ax.benchmark.benchmark_metric import BenchmarkMapMetric, BenchmarkMetric
 from ax.benchmark.benchmark_problem import get_continuous_search_space
 from ax.benchmark.benchmark_test_functions.botorch_test import BoTorchTestFunction
+from ax.benchmark.noise import GaussianNoise
 from ax.benchmark.problems.synthetic.from_botorch import (
     _get_name,
     create_problem_from_botorch,
@@ -145,7 +146,9 @@ def _test_constrained_from_botorch(
         botorch_problem = assert_is_instance(
             test_problem.botorch_problem, ConstrainedBaseTestProblem
         )
-        self.assertEqual(ax_problem.noise_std, noise_std)
+        self.assertEqual(
+            assert_is_instance(ax_problem.noise, GaussianNoise).noise_std, noise_std
+        )
         opt_config = ax_problem.optimization_config
         outcome_constraints = opt_config.outcome_constraints
         self.assertEqual(
diff --git a/ax/benchmark/tests/test_benchmark.py b/ax/benchmark/tests/test_benchmark.py
diff --git a/ax/benchmark/tests/test_benchmark_runner.py b/ax/benchmark/tests/test_benchmark_runner.py