Skip to content

Commit a50b842

Browse files
authored
Cleanup configuration for constraints. (dmlc#7758)
1 parent 3c9b044 commit a50b842

File tree

5 files changed

+53
-42
lines changed

5 files changed

+53
-42
lines changed

doc/treemethod.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,7 @@ Following table summarizes some differences in supported features between 4 tree
134134
+------------------+-----------+---------------------+---------------------+------------------------+
135135
| categorical data | F | T | T | T |
136136
+------------------+-----------+---------------------+---------------------+------------------------+
137-
| External memory | F | T | P | P |
137+
| External memory | F | T | T | P |
138138
+------------------+-----------+---------------------+---------------------+------------------------+
139139
| Distributed | F | T | T | T |
140140
+------------------+-----------+---------------------+---------------------+------------------------+

doc/tutorials/feature_interaction_constraint.rst

+8
Original file line numberDiff line numberDiff line change
@@ -174,6 +174,14 @@ parameter:
174174
num_boost_round = 1000, evals = evallist,
175175
early_stopping_rounds = 10)
176176
177+
**************************
178+
Using feature name instead
179+
**************************
180+
181+
XGBoost's Python package supports using feature names instead of feature index for
182+
specifying the constraints. Given a data frame with columns ``["f0", "f1", "f2"]``, the
183+
feature interaction constraint can be specified as ``[["f0", "f2"]]``.
184+
177185
**************
178186
Advanced topic
179187
**************

doc/tutorials/monotonic.rst

+11-1
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ Then fitting with monotonicity constraints only requires adding a single paramet
6969
.. code-block:: python
7070
7171
params_constrained = params.copy()
72-
params_constrained['monotone_constraints'] = "(1,-1)"
72+
params_constrained['monotone_constraints'] = (1,-1)
7373
7474
model_with_constraints = xgb.train(params_constrained, dtrain,
7575
num_boost_round = 1000, evals = evallist,
@@ -90,3 +90,13 @@ monotonic constraints may produce unnecessarily shallow trees. This is because t
9090
split. Monotonic constraints may wipe out all available split candidates, in which case no
9191
split is made. To reduce the effect, you may want to increase the ``max_bin`` parameter to
9292
consider more split candidates.
93+
94+
95+
*******************
96+
Using feature names
97+
*******************
98+
99+
XGBoost's Python package supports using feature names instead of feature index for
100+
specifying the constraints. Given a data frame with columns ``["f0", "f1", "f2"]``, the
101+
monotonic constraint can be specified as ``{"f0": 1, "f2": -1}``, and ``"f1"`` will
102+
default to ``0`` (no constraint).

python-package/xgboost/core.py

+32-39
Original file line numberDiff line numberDiff line change
@@ -1392,50 +1392,46 @@ def __init__(
13921392
raise TypeError('Unknown type:', model_file)
13931393

13941394
params = params or {}
1395-
params = _configure_metrics(params.copy())
1396-
params = self._configure_constraints(params)
1397-
if isinstance(params, list):
1398-
params.append(('validate_parameters', True))
1395+
params_processed = _configure_metrics(params.copy())
1396+
params_processed = self._configure_constraints(params_processed)
1397+
if isinstance(params_processed, list):
1398+
params_processed.append(("validate_parameters", True))
13991399
else:
1400-
params['validate_parameters'] = True
1400+
params_processed["validate_parameters"] = True
14011401

1402-
self.set_param(params or {})
1403-
if (params is not None) and ('booster' in params):
1404-
self.booster = params['booster']
1405-
else:
1406-
self.booster = 'gbtree'
1402+
self.set_param(params_processed or {})
14071403

1408-
def _transform_monotone_constrains(self, value: Union[Dict[str, int], str]) -> str:
1404+
def _transform_monotone_constrains(
1405+
self, value: Union[Dict[str, int], str]
1406+
) -> Union[Tuple[int, ...], str]:
14091407
if isinstance(value, str):
14101408
return value
14111409

14121410
constrained_features = set(value.keys())
1413-
if not constrained_features.issubset(set(self.feature_names or [])):
1414-
raise ValueError('Constrained features are not a subset of '
1415-
'training data feature names')
1411+
feature_names = self.feature_names or []
1412+
if not constrained_features.issubset(set(feature_names)):
1413+
raise ValueError(
1414+
"Constrained features are not a subset of training data feature names"
1415+
)
14161416

1417-
return '(' + ','.join([str(value.get(feature_name, 0))
1418-
for feature_name in self.feature_names]) + ')'
1417+
return tuple(value.get(name, 0) for name in feature_names)
14191418

14201419
def _transform_interaction_constraints(
1421-
self, value: Union[List[Tuple[str]], str]
1422-
) -> str:
1420+
self, value: Union[Sequence[Sequence[str]], str]
1421+
) -> Union[str, List[List[int]]]:
14231422
if isinstance(value, str):
14241423
return value
1425-
1426-
feature_idx_mapping = {k: str(v) for v, k in enumerate(self.feature_names or [])}
1424+
feature_idx_mapping = {
1425+
name: idx for idx, name in enumerate(self.feature_names or [])
1426+
}
14271427

14281428
try:
1429-
s = "["
1429+
result = []
14301430
for constraint in value:
1431-
s += (
1432-
"["
1433-
+ ",".join(
1434-
[feature_idx_mapping[feature_name] for feature_name in constraint]
1435-
)
1436-
+ "],"
1431+
result.append(
1432+
[feature_idx_mapping[feature_name] for feature_name in constraint]
14371433
)
1438-
return s[:-1] + "]"
1434+
return result
14391435
except KeyError as e:
14401436
raise ValueError(
14411437
"Constrained features are not a subset of training data feature names"
@@ -1444,17 +1440,16 @@ def _transform_interaction_constraints(
14441440
def _configure_constraints(self, params: Union[List, Dict]) -> Union[List, Dict]:
14451441
if isinstance(params, dict):
14461442
value = params.get("monotone_constraints")
1447-
if value:
1448-
params[
1449-
"monotone_constraints"
1450-
] = self._transform_monotone_constrains(value)
1443+
if value is not None:
1444+
params["monotone_constraints"] = self._transform_monotone_constrains(
1445+
value
1446+
)
14511447

14521448
value = params.get("interaction_constraints")
1453-
if value:
1449+
if value is not None:
14541450
params[
14551451
"interaction_constraints"
14561452
] = self._transform_interaction_constraints(value)
1457-
14581453
elif isinstance(params, list):
14591454
for idx, param in enumerate(params):
14601455
name, value = param
@@ -2462,11 +2457,9 @@ def trees_to_dataframe(self, fmap: Union[str, os.PathLike] = '') -> DataFrame:
24622457
if not PANDAS_INSTALLED:
24632458
raise ImportError(('pandas must be available to use this method.'
24642459
'Install pandas before calling again.'))
2465-
2466-
if getattr(self, 'booster', None) is not None and self.booster not in {'gbtree', 'dart'}:
2467-
raise ValueError(
2468-
f"This method is not defined for Booster type {self.booster}"
2469-
)
2460+
booster = json.loads(self.save_config())["learner"]["gradient_booster"]["name"]
2461+
if booster not in {"gbtree", "dart"}:
2462+
raise ValueError(f"This method is not defined for Booster type {booster}")
24702463

24712464
tree_ids = []
24722465
node_ids = []

tests/python/test_monotone_constraints.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ def test_monotone_constraints_feature_names(self, format):
9898

9999
# next check monotonicity when initializing monotone_constraints by feature names
100100
params = {
101-
'tree_method': 'hist', 'verbosity': 1,
101+
'tree_method': 'hist',
102102
'grow_policy': 'lossguide',
103103
'monotone_constraints': {'feature_0': 1, 'feature_1': -1}
104104
}

0 commit comments

Comments
 (0)