Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions python-package/lightgbm/callback.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,8 +239,9 @@ def __call__(self, env: CallbackEnv) -> None:
"Only list and callable values are supported "
"as a mapping from boosting round index to new parameter value."
)
if new_param != env.params.get(key, None):
new_parameters[key] = new_param
# Always update parameters to ensure compatibility with sklearn interface
# The booster's reset_parameter method handles whether the parameter actually changes
new_parameters[key] = new_param
if new_parameters:
if isinstance(env.model, Booster):
env.model.reset_parameter(new_parameters)
Expand Down
49 changes: 47 additions & 2 deletions python-package/lightgbm/sklearn.py
Original file line number Diff line number Diff line change
Expand Up @@ -975,6 +975,29 @@ def fit(
else:
sample_weight = np.multiply(sample_weight, class_sample_weight)

# Filter out booster-specific parameters from dataset parameters
from .basic import _ConfigAliases

# Get all booster parameter names to exclude from dataset
booster_params = set()
# Common booster parameters that should not be in dataset
booster_param_names = [
'boosting', 'learning_rate', 'num_leaves', 'max_depth', 'min_split_gain',
'min_child_weight', 'min_child_samples', 'subsample', 'subsample_freq',
'colsample_bytree', 'colsample_bynode', 'colsample_bylevel', 'reg_alpha',
'reg_lambda', 'random_state', 'n_estimators', 'num_iterations',
'feature_fraction', 'bagging_fraction', 'feature_fraction_bynode',
'bagging_freq', 'lambda_l1', 'lambda_l2', 'min_gain_to_split',
'min_data_in_leaf', 'min_sum_hessian_in_leaf', 'max_delta_step',
'max_bin', 'max_depth', 'min_data_in_bin', 'bin_construct_sample_cnt'
]

for param_name in booster_param_names:
booster_params.update(_ConfigAliases.get_sorted(param_name))

# Filter dataset parameters
dataset_params = {k: v for k, v in params.items() if k not in booster_params}

train_set = Dataset(
data=_X,
label=_y,
Expand All @@ -983,7 +1006,7 @@ def fit(
init_score=init_score,
categorical_feature=categorical_feature,
feature_name=feature_name,
params=params,
params=dataset_params,
)

valid_sets: List[Dataset] = []
Expand Down Expand Up @@ -1023,14 +1046,36 @@ def fit(
name="eval_group",
i=i,
)
# Filter out booster-specific parameters from dataset parameters
from .basic import _ConfigAliases

# Get all booster parameter names to exclude from dataset
booster_params = set()
booster_param_names = [
'boosting', 'learning_rate', 'num_leaves', 'max_depth', 'min_split_gain',
'min_child_weight', 'min_child_samples', 'subsample', 'subsample_freq',
'colsample_bytree', 'colsample_bynode', 'colsample_bylevel', 'reg_alpha',
'reg_lambda', 'random_state', 'n_estimators', 'num_iterations',
'feature_fraction', 'bagging_fraction', 'feature_fraction_bynode',
'bagging_freq', 'lambda_l1', 'lambda_l2', 'min_gain_to_split',
'min_data_in_leaf', 'min_sum_hessian_in_leaf', 'max_delta_step',
'max_bin', 'max_depth', 'min_data_in_bin', 'bin_construct_sample_cnt'
]

for param_name in booster_param_names:
booster_params.update(_ConfigAliases.get_sorted(param_name))

# Filter dataset parameters
valid_dataset_params = {k: v for k, v in params.items() if k not in booster_params}

valid_set = Dataset(
data=valid_data[0],
label=valid_data[1],
weight=valid_weight,
group=valid_group,
init_score=valid_init_score,
categorical_feature="auto",
params=params,
params=valid_dataset_params,
)

valid_sets.append(valid_set)
Expand Down
26 changes: 26 additions & 0 deletions tests/python_package_test/test_callback.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,3 +63,29 @@ def test_reset_parameter_callback_is_picklable(serializer):
assert callback_from_disk.before_iteration is True
assert callback.kwargs == callback_from_disk.kwargs
assert callback.kwargs == params


def test_reset_parameter_callback_with_sklearn():
"""Test that reset_parameter callback works with LGBMClassifier."""
import numpy as np
import lightgbm as lgb
from lightgbm import LGBMClassifier
from sklearn.datasets import make_classification

X, y = make_classification(n_samples=1000, n_features=10, random_state=42)

model = LGBMClassifier(
n_estimators=10,
colsample_bytree=0.9, # Start high
callbacks=[lgb.reset_parameter(colsample_bytree=[0.3, 0.8, 0.3, 0.8, 0.3, 0.8, 0.3, 0.8, 0.3, 0.8])],
verbose=-1
)
model.fit(X, y)

trees_df = model.booster_.trees_to_dataframe()
unique_feature_counts = trees_df.groupby('tree_index')['split_feature'].nunique()

assert unique_feature_counts.nunique() > 1, (
f"reset_parameter callback did not work with LGBMClassifier. "
f"All trees used the same number of features. Counts: {unique_feature_counts.unique()}"
)
Loading