Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP: [python-package] preserve params when copying Booster (fixes #5539) #6101

Draft
wants to merge 4 commits into
base: master
Choose a base branch
from
Draft
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
merge master
  • Loading branch information
jameslamb committed Aug 18, 2024
commit 5ec27acc8d0063100d68cf0481380b9705f51d71
11 changes: 3 additions & 8 deletions tests/c_api_test/test_.py
Original file line number Diff line number Diff line change
@@ -200,14 +200,9 @@ def test_booster(tmp_path):
free_dataset(test)
booster2 = ctypes.c_void_p()
num_total_model = ctypes.c_int(0)
LIB.LGBM_BoosterCreateFromModelfile(
c_str('model.txt'),
ctypes.byref(num_total_model),
ctypes.byref(booster2))
LIB.LGBM_BoosterResetParameter(
booster2,
c_str("app=binary metric=auc num_leaves=29 verbose=0"))
data = np.loadtxt(str(binary_example_dir / 'binary.test'), dtype=np.float64)
LIB.LGBM_BoosterCreateFromModelfile(c_str(str(model_path)), ctypes.byref(num_total_model), ctypes.byref(booster2))
LIB.LGBM_BoosterResetParameter(booster2, c_str("app=binary metric=auc num_leaves=29 verbose=0"))
data = np.loadtxt(str(binary_example_dir / "binary.test"), dtype=np.float64)
mat = data[:, 1:]
preb = np.empty(mat.shape[0], dtype=np.float64)
num_preb = ctypes.c_int64(0)
107 changes: 65 additions & 42 deletions tests/python_package_test/test_basic.py
Original file line number Diff line number Diff line change
@@ -15,7 +15,7 @@
import lightgbm as lgb
from lightgbm.compat import PANDAS_INSTALLED, pd_DataFrame, pd_Series

from .utils import BOOSTING_TYPES, dummy_obj, load_breast_cancer, mse_obj
from .utils import BOOSTING_TYPES, dummy_obj, load_breast_cancer, mse_obj, np_assert_array_equal


def test_basic(tmp_path):
@@ -895,50 +895,78 @@ def test_feature_names_are_set_correctly_when_no_feature_names_passed_into_Datas
assert ds.construct().feature_name == ["Column_0", "Column_1", "Column_2"]


@pytest.mark.parametrize('boosting_type', BOOSTING_TYPES)
def test_booster_deepcopy_preserves_parameters(boosting_type):
orig_params = {
'boosting': boosting_type,
'feature_fraction': 0.708,
'num_leaves': 5,
'verbosity': -1
}
bst = lgb.train(
params=orig_params,
num_boost_round=2,
train_set=lgb.Dataset(np.random.rand(100, 2))
# NOTE: this intentionally contains values where num_leaves <, ==, and > (max_depth^2)
@pytest.mark.parametrize(("max_depth", "num_leaves"), [(-1, 3), (-1, 50), (5, 3), (5, 31), (5, 32), (8, 3), (8, 31)])
def test_max_depth_warning_is_not_raised_if_num_leaves_is_also_provided(capsys, num_leaves, max_depth):
X, y = make_blobs(n_samples=1_000, n_features=1, centers=2)
lgb.Booster(
params={
"objective": "binary",
"max_depth": max_depth,
"num_leaves": num_leaves,
"num_iterations": 1,
"verbose": 0,
},
train_set=lgb.Dataset(X, label=y),
)
assert "Provided parameters constrain tree depth" not in capsys.readouterr().out


# NOTE: max_depth < 5 is significant here because the default for num_leaves=31. With max_depth=5,
# a full depth-wise tree would have 2^5 = 32 leaves.
@pytest.mark.parametrize("max_depth", [1, 2, 3, 4])
def test_max_depth_warning_is_not_raised_if_max_depth_gt_1_and_lt_5_and_num_leaves_omitted(capsys, max_depth):
X, y = make_blobs(n_samples=1_000, n_features=1, centers=2)
lgb.Booster(
params={
"objective": "binary",
"max_depth": max_depth,
"num_iterations": 1,
"verbose": 0,
},
train_set=lgb.Dataset(X, label=y),
)
assert "Provided parameters constrain tree depth" not in capsys.readouterr().out


@pytest.mark.parametrize("max_depth", [5, 6, 7, 8, 9])
def test_max_depth_warning_is_raised_if_max_depth_gte_5_and_num_leaves_omitted(capsys, max_depth):
X, y = make_blobs(n_samples=1_000, n_features=1, centers=2)
lgb.Booster(
params={
"objective": "binary",
"max_depth": max_depth,
"num_iterations": 1,
"verbose": 0,
},
train_set=lgb.Dataset(X, label=y),
)
expected_warning = (
f"[LightGBM] [Warning] Provided parameters constrain tree depth (max_depth={max_depth}) without explicitly "
f"setting 'num_leaves'. This can lead to underfitting. To resolve this warning, pass 'num_leaves' (<={2**max_depth}) "
"in params. Alternatively, pass (max_depth=-1) and just use 'num_leaves' to constrain model complexity."
)
assert expected_warning in capsys.readouterr().out


@pytest.mark.parametrize("boosting_type", BOOSTING_TYPES)
def test_booster_deepcopy_preserves_parameters(boosting_type, default_rng):
orig_params = {"boosting": boosting_type, "feature_fraction": 0.708, "num_leaves": 5, "verbosity": -1}
bst = lgb.train(params=orig_params, num_boost_round=2, train_set=lgb.Dataset(default_rng.random(100, 2)))
bst2 = deepcopy(bst)
assert bst2.params == bst.params
assert bst.params["num_leaves"] == 5
assert bst.params["verbosity"] == -1

# passed-in params shouldn't have been modified outside of lightgbm
assert orig_params == {
'boosting': boosting_type,
'feature_fraction': 0.708,
'num_leaves': 5,
'verbosity': -1
}
assert orig_params == {"boosting": boosting_type, "feature_fraction": 0.708, "num_leaves": 5, "verbosity": -1}


@pytest.mark.parametrize('boosting_type', BOOSTING_TYPES)
def test_booster_params_kwarg_overrides_params_from_model_string(boosting_type):
orig_params = {
'boosting': boosting_type,
'feature_fraction': 0.708,
'num_leaves': 5,
'verbosity': -1
}
bst = lgb.train(
params=orig_params,
num_boost_round=2,
train_set=lgb.Dataset(np.random.rand(100, 2))
)
bst2 = lgb.Booster(
params={'num_leaves': 7},
model_str=bst.model_to_string()
)
@pytest.mark.parametrize("boosting_type", BOOSTING_TYPES)
def test_booster_params_kwarg_overrides_params_from_model_string(boosting_type, default_rng):
orig_params = {"boosting": boosting_type, "feature_fraction": 0.708, "num_leaves": 5, "verbosity": -1}
bst = lgb.train(params=orig_params, num_boost_round=2, train_set=lgb.Dataset(default_rng.random(100, 2)))
bst2 = lgb.Booster(params={"num_leaves": 7}, model_str=bst.model_to_string())

# params should have been updated on the Python object and the C++ side
assert bst2.params["num_leaves"] == 7
@@ -949,9 +977,4 @@ def test_booster_params_kwarg_overrides_params_from_model_string(boosting_type):
raise RuntimeError

# passed-in params shouldn't have been modified outside of lightgbm
assert orig_params == {
'boosting': boosting_type,
'feature_fraction': 0.708,
'num_leaves': 5,
'verbosity': -1
}
assert orig_params == {"boosting": boosting_type, "feature_fraction": 0.708, "num_leaves": 5, "verbosity": -1}
2 changes: 1 addition & 1 deletion tests/python_package_test/utils.py
Original file line number Diff line number Diff line change
@@ -11,7 +11,7 @@

import lightgbm as lgb

BOOSTING_TYPES = ['gbdt', 'dart', 'goss', 'rf']
BOOSTING_TYPES = ["gbdt", "dart", "goss", "rf"]
SERIALIZERS = ["pickle", "joblib", "cloudpickle"]


Loading
Oops, something went wrong.
You are viewing a condensed version of this merge commit. You can view the full changes here.