Skip to content

Commit c258292

Browse files
Carl Hvarfnerfacebook-github-bot
authored andcommitted
Migration to Noise module (#4761)
Summary: Updates `benchmark.py` to use the new Noise module architecture: - passes `problem.noise` instead of `problem.noise_std` to `BenchmarkRunner` - removes the obsolete `add_custom_noise` replacement since noise is now handled entirely by the `Noise` object on the runner. - Kept `noise_std` argument around to ensure backwards compatibility. Reviewed By: saitcakmak Differential Revision: D90597013 Privacy Context Container: L1307644
1 parent 3156389 commit c258292

File tree

11 files changed

+134
-285
lines changed

11 files changed

+134
-285
lines changed

ax/benchmark/benchmark.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -147,7 +147,7 @@ def get_benchmark_runner(
147147

148148
return BenchmarkRunner(
149149
test_function=problem.test_function,
150-
noise_std=problem.noise_std,
150+
noise=problem.noise,
151151
step_runtime_function=problem.step_runtime_function,
152152
max_concurrency=max_concurrency,
153153
force_use_simulated_backend=force_use_simulated_backend,
@@ -189,9 +189,9 @@ def get_oracle_experiment_from_params(
189189
optimization_config=problem.optimization_config,
190190
)
191191

192-
# Ensure noiseless evaluation by replacing any custom noise function with None
193-
noiseless_test_function = replace(problem.test_function, add_custom_noise=None)
194-
runner = BenchmarkRunner(test_function=noiseless_test_function, noise_std=0.0)
192+
# The test function produces ground-truth values; noise is handled by
193+
# BenchmarkRunner's Noise object (default is noiseless GaussianNoise).
194+
runner = BenchmarkRunner(test_function=problem.test_function)
195195

196196
# Silence INFO logs from ax.core.experiment that state "Attached custom
197197
# parameterizations"

ax/benchmark/benchmark_problem.py

Lines changed: 25 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,14 @@
55

66
# pyre-strict
77

8+
import warnings
89
from collections.abc import Mapping, Sequence
910
from dataclasses import dataclass, field
1011

1112
from ax.benchmark.benchmark_metric import BenchmarkMapMetric, BenchmarkMetric
1213
from ax.benchmark.benchmark_step_runtime_function import TBenchmarkStepRuntimeFunction
1314
from ax.benchmark.benchmark_test_function import BenchmarkTestFunction
15+
from ax.benchmark.noise import GaussianNoise, Noise
1416
from ax.core.auxiliary import AuxiliaryExperiment, AuxiliaryExperimentPurpose
1517
from ax.core.metric import Metric
1618
from ax.core.objective import MultiObjective, Objective, ScalarizedObjective
@@ -43,12 +45,10 @@ class BenchmarkProblem(Base):
4345
as one trial.
4446
test_function: A `BenchmarkTestFunction`, which will generate noiseless
4547
data. This will be used by a `BenchmarkRunner`.
46-
noise_std: Describes how noise is added to the output of the
47-
`test_function`. If a float, IID random normal noise with that
48-
standard deviation is added. A dict whose keys match
49-
`test_functions.outcome_names` sets different noise standard
50-
deviations for the different outcomes produced by the
51-
`test_function`. This will be used by a `BenchmarkRunner`.
48+
noise: A `Noise` object that determines how noise is added to the
49+
ground-truth evaluations produced by the `test_function`. Defaults
50+
to noiseless (`GaussianNoise(noise_std=0.0)`).
51+
noise_std: Deprecated. Use `noise` instead.
5252
optimal_value: The best ground-truth objective value, used for scoring
5353
optimization results on a scale from 0 to 100, where achieving the
5454
`optimal_value` receives a score of 100. The `optimal_value` should
@@ -93,7 +93,8 @@ class BenchmarkProblem(Base):
9393
optimization_config: OptimizationConfig
9494
num_trials: int
9595
test_function: BenchmarkTestFunction
96-
noise_std: float | Mapping[str, float] = 0.0
96+
noise: Noise = field(default_factory=GaussianNoise)
97+
noise_std: float | Mapping[str, float] | None = None
9798
optimal_value: float
9899
baseline_value: float
99100
worst_feasible_value: float | None = None
@@ -108,6 +109,23 @@ class BenchmarkProblem(Base):
108109
tracking_metrics: list[Metric] | None = None
109110

110111
def __post_init__(self) -> None:
112+
# Handle backward compatibility for noise_std parameter
113+
if self.noise_std is not None:
114+
warnings.warn(
115+
"noise_std is deprecated. Use noise=GaussianNoise(noise_std=...) "
116+
"instead.",
117+
DeprecationWarning,
118+
stacklevel=2,
119+
)
120+
# Check if noise was also explicitly set (not default)
121+
if not isinstance(self.noise, GaussianNoise) or self.noise.noise_std != 0.0:
122+
raise ValueError(
123+
"Cannot specify both 'noise_std' and a non-default 'noise'. "
124+
"Use only 'noise=GaussianNoise(noise_std=...)' instead."
125+
)
126+
# Convert noise_std to GaussianNoise (use 0 if None)
127+
self.noise = GaussianNoise(noise_std=self.noise_std or 0)
128+
111129
# Validate inputs
112130
if self.report_inference_value_as_trace and self.is_moo:
113131
raise NotImplementedError(

ax/benchmark/benchmark_runner.py

Lines changed: 31 additions & 82 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,9 @@
55

66
# pyre-strict
77

8+
import warnings
89
from collections.abc import Iterable, Mapping, Sequence
910
from dataclasses import dataclass, field
10-
from math import sqrt
1111
from typing import Any
1212

1313
import numpy as np
@@ -16,6 +16,7 @@
1616
from ax.benchmark.benchmark_step_runtime_function import TBenchmarkStepRuntimeFunction
1717
from ax.benchmark.benchmark_test_function import BenchmarkTestFunction
1818
from ax.benchmark.benchmark_trial_metadata import BenchmarkTrialMetadata
19+
from ax.benchmark.noise import GaussianNoise, Noise
1920
from ax.core.base_trial import BaseTrial, TrialStatus
2021
from ax.core.batch_trial import BatchTrial
2122
from ax.core.runner import Runner
@@ -69,54 +70,6 @@ def _dict_of_arrays_to_df(
6970
return df
7071

7172

72-
def _add_noise(
73-
df: pd.DataFrame,
74-
noise_stds: Mapping[str, float],
75-
arm_weights: Mapping[str, float] | None,
76-
) -> pd.DataFrame:
77-
"""
78-
For each ``Y_true`` in ``df``, with metric name ``metric_name`` and
79-
arm name ``arm_name``, add noise with standard deviation
80-
``noise_stds[metric_name] / sqrt_nlzd_arm_weights[arm_name]``,
81-
where ``sqrt_nlzd_arm_weights = sqrt(arm_weights[arm_name] /
82-
sum(arm_weights.values())])``.
83-
84-
Args:
85-
df: A DataFrame with columns including
86-
["metric_name", "arm_name", "Y_true"].
87-
noise_stds: A mapping from metric name to what the standard
88-
deviation would be if one arm received the entire
89-
sample budget.
90-
arm_weights: Either ``None`` if there is only one ``Arm``, or a
91-
mapping from ``Arm`` name to the arm's allocation. Using arm
92-
weights will increase noise levels, since each ``Arm`` is
93-
assumed to receive a fraction of the total sample budget.
94-
95-
Returns:
96-
The original ``df``, now with additional columns ["mean", "sem"].
97-
"""
98-
noiseless = all(v == 0 for v in noise_stds.values())
99-
if not noiseless:
100-
noise_std_ser = df["metric_name"].map(noise_stds)
101-
if arm_weights is not None:
102-
nlzd_arm_weights_sqrt = {
103-
arm_name: sqrt(weight / sum(arm_weights.values()))
104-
for arm_name, weight in arm_weights.items()
105-
}
106-
arm_weights_ser = df["arm_name"].map(nlzd_arm_weights_sqrt)
107-
df["sem"] = noise_std_ser / arm_weights_ser
108-
109-
else:
110-
df["sem"] = noise_std_ser
111-
112-
df["mean"] = df["Y_true"] + np.random.normal(loc=0, scale=df["sem"])
113-
114-
else:
115-
df["sem"] = 0.0
116-
df["mean"] = df["Y_true"]
117-
return df
118-
119-
12073
def get_total_runtime(
12174
trial: BaseTrial,
12275
step_runtime_function: TBenchmarkStepRuntimeFunction | None,
@@ -139,7 +92,7 @@ class BenchmarkRunner(Runner):
13992
A Runner that produces both observed and ground-truth values.
14093
14194
Observed values equal ground-truth values plus noise, with the noise added
142-
according to the standard deviations returned by `get_noise_stds()`.
95+
according to the `Noise` object provided.
14396
14497
This runner does require that every benchmark has a ground truth, which
14598
won't necessarily be true for real-world problems. Such problems fall into
@@ -161,8 +114,10 @@ class BenchmarkRunner(Runner):
161114
Args:
162115
test_function: A ``BenchmarkTestFunction`` from which to generate
163116
deterministic data before adding noise.
164-
noise_std: The standard deviation of the noise added to the data. Can be
165-
a dict to be per-metric.
117+
noise: A ``Noise`` object that determines how noise is added to the
118+
ground-truth evaluations. Defaults to noiseless
119+
(``GaussianNoise(noise_std=0.0)``).
120+
noise_std: Deprecated. Use ``noise`` instead.
166121
step_runtime_function: A function that takes in parameters
167122
(in ``TParameterization`` format) and returns the runtime of a step.
168123
max_concurrency: The maximum number of trials that can be running at a
@@ -175,24 +130,30 @@ class BenchmarkRunner(Runner):
175130
"""
176131

177132
test_function: BenchmarkTestFunction
178-
noise_std: float | Mapping[str, float] = 0.0
133+
noise: Noise = field(default_factory=GaussianNoise)
134+
noise_std: float | Mapping[str, float] | None = None
179135
step_runtime_function: TBenchmarkStepRuntimeFunction | None = None
180136
max_concurrency: int = 1
181137
force_use_simulated_backend: bool = False
182138
simulated_backend_runner: SimulatedBackendRunner | None = field(init=False)
183139

184140
def __post_init__(self) -> None:
185-
# Check for conflicting noise configuration
186-
has_custom_noise = self.test_function.add_custom_noise is not None
187-
188-
# This works for both lists and dicts, and the user specifies anything
189-
# other than 0.0 as noise_std alongside a custom noise, we error out.
190-
if has_custom_noise and (self.noise_std != 0.0):
191-
raise ValueError(
192-
"Cannot specify both `add_custom_noise` on the test function and "
193-
"a `noise_std`. Either use `add_custom_noise` for custom "
194-
"noise behavior or `noise_std` for default noise behavior."
141+
# Handle backward compatibility for noise_std parameter
142+
if self.noise_std is not None:
143+
warnings.warn(
144+
"noise_std is deprecated. Use noise=GaussianNoise(noise_std=...) "
145+
"instead.",
146+
DeprecationWarning,
147+
stacklevel=2,
195148
)
149+
# Check if noise was also explicitly set (not default)
150+
if not isinstance(self.noise, GaussianNoise) or self.noise.noise_std != 0.0:
151+
raise ValueError(
152+
"Cannot specify both 'noise_std' and a non-default 'noise'. "
153+
"Use only 'noise=GaussianNoise(noise_std=...)' instead."
154+
)
155+
# Convert noise_std to GaussianNoise (use 0 if None)
156+
self.noise = GaussianNoise(noise_std=self.noise_std or 0)
196157

197158
use_simulated_backend = (
198159
(self.max_concurrency > 1)
@@ -238,16 +199,6 @@ def get_Y_true(self, params: Mapping[str, TParamValue]) -> npt.NDArray:
238199
return result[:, None]
239200
return result
240201

241-
def get_noise_stds(self) -> dict[str, float]:
242-
noise_std = self.noise_std
243-
if isinstance(noise_std, float | int):
244-
return {name: float(noise_std) for name in self.outcome_names}
245-
if not set(noise_std.keys()) == set(self.outcome_names):
246-
raise ValueError(
247-
"Noise std must have keys equal to outcome names if given as a dict."
248-
)
249-
return dict(noise_std)
250-
251202
def run(self, trial: BaseTrial) -> dict[str, BenchmarkTrialMetadata]:
252203
"""Run the trial by evaluating its parameterization(s).
253204
@@ -286,15 +237,13 @@ def run(self, trial: BaseTrial) -> dict[str, BenchmarkTrialMetadata]:
286237
if isinstance(trial, BatchTrial)
287238
else None
288239
)
289-
# Check for custom noise function, otherwise use default noise behavior
290-
if self.test_function.add_custom_noise is not None:
291-
df = self.test_function.add_custom_noise(
292-
df, trial, self.get_noise_stds(), arm_weights
293-
)
294-
else:
295-
df = _add_noise(
296-
df=df, noise_stds=self.get_noise_stds(), arm_weights=arm_weights
297-
)
240+
# Use the Noise object to add noise to the ground-truth evaluations
241+
df = self.noise.add_noise(
242+
df=df,
243+
trial=trial,
244+
outcome_names=self.outcome_names,
245+
arm_weights=arm_weights,
246+
)
298247
df["trial_index"] = trial.index
299248
df.drop(columns=["Y_true"], inplace=True)
300249
df["metric_signature"] = df["metric_name"]

ax/benchmark/benchmark_test_function.py

Lines changed: 2 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -6,53 +6,30 @@
66
# pyre-strict
77

88
from abc import ABC, abstractmethod
9-
from collections.abc import Callable, Mapping, Sequence
9+
from collections.abc import Mapping, Sequence
1010
from dataclasses import dataclass
1111

12-
import pandas as pd
13-
from ax.core.base_trial import BaseTrial
1412
from ax.core.types import TParamValue
1513
from torch import Tensor
1614

17-
# Type alias for the custom noise function.
18-
# The callable takes all the arguments that are exposed in the benchmark runner:
19-
# - df: The lookup_data().df DataFrame. Mandatory
20-
# - trial: The trial being evaluated
21-
# - noise_stds: Mapping from metric name to noise std
22-
# - arm_weights: Mapping from arm name to weight, or None for single-arm trials
23-
# And returns a DataFrame with added "mean" and "sem" columns.
24-
TAddCustomNoise = Callable[
25-
[
26-
pd.DataFrame,
27-
BaseTrial | None,
28-
Mapping[str, float] | None,
29-
Mapping[str, float] | None,
30-
],
31-
pd.DataFrame,
32-
]
33-
3415

3516
@dataclass(kw_only=True)
3617
class BenchmarkTestFunction(ABC):
3718
"""
3819
The basic Ax class for generating deterministic data to benchmark against.
3920
40-
(Noise - if desired - is added by the runner.)
21+
(Noise - if desired - is added by the runner using a `Noise` object.)
4122
4223
Args:
4324
outcome_names: Names of the outcomes.
4425
n_steps: Number of data points produced per metric and per evaluation. 1
4526
if data is not time-series. If data is time-series, this will
4627
eventually become the number of values on a `MapMetric` for
4728
evaluations that run to completion.
48-
add_custom_noise: Optional callable to add custom noise to evaluation
49-
results. If provided, it will be called instead of the default noise
50-
behavior, overriding the noise_std argument.
5129
"""
5230

5331
outcome_names: Sequence[str]
5432
n_steps: int = 1
55-
add_custom_noise: TAddCustomNoise | None = None
5633

5734
@abstractmethod
5835
def evaluate_true(self, params: Mapping[str, TParamValue]) -> Tensor:

ax/benchmark/problems/synthetic/bandit.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
import numpy as np
1111
from ax.benchmark.benchmark_problem import BenchmarkProblem, get_soo_opt_config
1212
from ax.benchmark.benchmark_test_functions.synthetic import IdentityTestFunction
13+
from ax.benchmark.noise import GaussianNoise
1314
from ax.core.parameter import ChoiceParameter, ParameterType
1415
from ax.core.search_space import SearchSpace
1516

@@ -71,6 +72,6 @@ def get_bandit_problem(num_choices: int = 30, num_trials: int = 3) -> BenchmarkP
7172
baseline_value=baseline_value,
7273
test_function=test_function,
7374
report_inference_value_as_trace=True,
74-
noise_std=1.0,
75+
noise=GaussianNoise(noise_std=1.0),
7576
status_quo_params={"x0": num_choices // 2},
7677
)

ax/benchmark/problems/synthetic/from_botorch.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
)
2121
from ax.benchmark.benchmark_step_runtime_function import TBenchmarkStepRuntimeFunction
2222
from ax.benchmark.benchmark_test_functions.botorch_test import BoTorchTestFunction
23+
from ax.benchmark.noise import GaussianNoise
2324
from ax.core.auxiliary import AuxiliaryExperiment, AuxiliaryExperimentPurpose
2425
from ax.core.parameter import ChoiceParameter, ParameterType, RangeParameter
2526
from ax.core.search_space import SearchSpace
@@ -127,8 +128,8 @@ def create_problem_from_botorch(
127128
`test_problem_class`. This should *not* include `noise_std` or
128129
`negate`, since these are handled through Ax benchmarking (as the
129130
`noise_std` and `lower_is_better` arguments to `BenchmarkProblem`).
130-
noise_std: Standard deviation of synthetic noise added to outcomes. If a
131-
float, the same noise level is used for all objectives.
131+
noise_std: Dict of standard deviations of synthetic Gaussian noise added
132+
to outcomes. If a float, the same noise level is used for all objectives.
132133
lower_is_better: Whether this is a minimization problem. For MOO, this
133134
applies to all objectives.
134135
num_trials: Simply the `num_trials` of the `BenchmarkProblem` created.
@@ -271,7 +272,7 @@ def create_problem_from_botorch(
271272
search_space=search_space,
272273
optimization_config=optimization_config,
273274
test_function=test_function,
274-
noise_std=noise_std,
275+
noise=GaussianNoise(noise_std=noise_std),
275276
num_trials=num_trials,
276277
optimal_value=assert_is_instance(optimal_value, float),
277278
baseline_value=baseline_value,

ax/benchmark/problems/synthetic/hss/jenatton.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
import torch
1212
from ax.benchmark.benchmark_problem import BenchmarkProblem, get_soo_opt_config
1313
from ax.benchmark.benchmark_test_function import BenchmarkTestFunction
14+
from ax.benchmark.noise import GaussianNoise
1415
from ax.core.parameter import ChoiceParameter, ParameterType, RangeParameter
1516
from ax.core.search_space import SearchSpace
1617
from pyre_extensions import none_throws
@@ -116,7 +117,7 @@ def get_jenatton_benchmark_problem(
116117
search_space=get_jenatton_search_space(),
117118
optimization_config=optimization_config,
118119
test_function=Jenatton(outcome_names=[name]),
119-
noise_std=noise_std,
120+
noise=GaussianNoise(noise_std=noise_std),
120121
num_trials=num_trials,
121122
optimal_value=JENATTON_OPTIMAL_VALUE,
122123
baseline_value=JENATTON_BASELINE_VALUE,

0 commit comments

Comments
 (0)