Skip to content

Commit 7f19d4b

Browse files
Fix for Logistic Regression loss scaling (#1908) (#1943)
* Fix for LogReg loss scaling * Correct deselected test name (cherry picked from commit aaad387) Co-authored-by: Alexander Andreev <alexander.andreev@intel.com>
1 parent 52f27a5 commit 7f19d4b

File tree

2 files changed

+23
-7
lines changed

2 files changed

+23
-7
lines changed

daal4py/sklearn/linear_model/logistic_path.py

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -426,6 +426,10 @@ def __logistic_regression_path(
426426
(classes.size, n_features + int(fit_intercept)), order="F", dtype=X.dtype
427427
)
428428

429+
# Adoption of https://github.com/scikit-learn/scikit-learn/pull/26721
430+
if solver in ["lbfgs", "newton-cg", "newton-cholesky"]:
431+
sw_sum = len(X) if sample_weight is None else np.sum(sample_weight)
432+
429433
if coef is not None:
430434
# it must work both giving the bias term and not
431435
if multi_class == "ovr":
@@ -592,18 +596,18 @@ def grad(x, *args):
592596
X,
593597
target,
594598
0.0,
595-
1.0 / (2 * C * C_daal_multiplier),
599+
1.0 / (2 * C * C_daal_multiplier * sw_sum),
596600
fit_intercept,
597601
value=True,
598602
gradient=True,
599603
hessian=False,
600604
)
601605
else:
602606
if sklearn_check_version("1.1"):
603-
l2_reg_strength = 1.0 / C
607+
l2_reg_strength = 1.0 / (C * sw_sum)
604608
extra_args = (X, target, sample_weight, l2_reg_strength, n_threads)
605609
else:
606-
extra_args = (X, target, 1.0 / C, sample_weight)
610+
extra_args = (X, target, 1.0 / (C * sw_sum), sample_weight)
607611

608612
iprint = [-1, 50, 1, 100, 101][
609613
np.searchsorted(np.array([0, 1, 2, 3]), verbose)
@@ -614,7 +618,13 @@ def grad(x, *args):
614618
method="L-BFGS-B",
615619
jac=True,
616620
args=extra_args,
617-
options={"iprint": iprint, "gtol": tol, "maxiter": max_iter},
621+
options={
622+
"maxiter": max_iter,
623+
"maxls": 50,
624+
"iprint": iprint,
625+
"gtol": tol,
626+
"ftol": 64 * np.finfo(float).eps,
627+
},
618628
)
619629
n_iter_i = _check_optimize_result(
620630
solver,
@@ -629,7 +639,7 @@ def grad(x, *args):
629639
if _dal_ready:
630640

631641
def make_ncg_funcs(f, value=False, gradient=False, hessian=False):
632-
daal_penaltyL2 = 1.0 / (2 * C * C_daal_multiplier)
642+
daal_penaltyL2 = 1.0 / (2 * C * C_daal_multiplier * sw_sum)
633643
_obj_, X_, y_, n_samples = daal_extra_args_func(
634644
classes.size,
635645
w0,
@@ -662,10 +672,10 @@ def _func_(x, *args):
662672
)
663673
else:
664674
if sklearn_check_version("1.1"):
665-
l2_reg_strength = 1.0 / C
675+
l2_reg_strength = 1.0 / (C * sw_sum)
666676
args = (X, target, sample_weight, l2_reg_strength, n_threads)
667677
else:
668-
args = (X, target, 1.0 / C, sample_weight)
678+
args = (X, target, 1.0 / (C * sw_sum), sample_weight)
669679

670680
w0, n_iter_i = _newton_cg(
671681
hess, func, grad, w0, args=args, maxiter=max_iter, tol=tol

deselected_tests.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -190,6 +190,12 @@ deselected_tests:
190190
# margin above the test threshold, see https://github.com/scikit-learn/scikit-learn/pull/13645
191191
- linear_model/tests/test_logistic.py::test_dtype_match
192192

193+
# Logistic Regression coeffs change due to fix for loss scaling
194+
# (https://github.com/scikit-learn/scikit-learn/pull/26721)
195+
- feature_selection/tests/test_from_model.py::test_importance_getter[estimator0-named_steps.logisticregression.coef_]
196+
- inspection/_plot/tests/test_boundary_decision_display.py::test_class_of_interest_binary[predict_proba]
197+
- linear_model/tests/test_sag.py::test_sag_pobj_matches_logistic_regression
198+
193199
# This fails on certain platforms. While weighted data does not go through DAAL,
194200
# unweighted does. Since convergence does not occur (comment in the test
195201
# suggests that) and because coefficients are slightly different,

0 commit comments

Comments
 (0)