@@ -426,6 +426,10 @@ def __logistic_regression_path(
426
426
(classes .size , n_features + int (fit_intercept )), order = "F" , dtype = X .dtype
427
427
)
428
428
429
+ # Adoption of https://github.com/scikit-learn/scikit-learn/pull/26721
430
+ if solver in ["lbfgs" , "newton-cg" , "newton-cholesky" ]:
431
+ sw_sum = len (X ) if sample_weight is None else np .sum (sample_weight )
432
+
429
433
if coef is not None :
430
434
# it must work both giving the bias term and not
431
435
if multi_class == "ovr" :
@@ -592,18 +596,18 @@ def grad(x, *args):
592
596
X ,
593
597
target ,
594
598
0.0 ,
595
- 1.0 / (2 * C * C_daal_multiplier ),
599
+ 1.0 / (2 * C * C_daal_multiplier * sw_sum ),
596
600
fit_intercept ,
597
601
value = True ,
598
602
gradient = True ,
599
603
hessian = False ,
600
604
)
601
605
else :
602
606
if sklearn_check_version ("1.1" ):
603
- l2_reg_strength = 1.0 / C
607
+ l2_reg_strength = 1.0 / ( C * sw_sum )
604
608
extra_args = (X , target , sample_weight , l2_reg_strength , n_threads )
605
609
else :
606
- extra_args = (X , target , 1.0 / C , sample_weight )
610
+ extra_args = (X , target , 1.0 / ( C * sw_sum ) , sample_weight )
607
611
608
612
iprint = [- 1 , 50 , 1 , 100 , 101 ][
609
613
np .searchsorted (np .array ([0 , 1 , 2 , 3 ]), verbose )
@@ -614,7 +618,13 @@ def grad(x, *args):
614
618
method = "L-BFGS-B" ,
615
619
jac = True ,
616
620
args = extra_args ,
617
- options = {"iprint" : iprint , "gtol" : tol , "maxiter" : max_iter },
621
+ options = {
622
+ "maxiter" : max_iter ,
623
+ "maxls" : 50 ,
624
+ "iprint" : iprint ,
625
+ "gtol" : tol ,
626
+ "ftol" : 64 * np .finfo (float ).eps ,
627
+ },
618
628
)
619
629
n_iter_i = _check_optimize_result (
620
630
solver ,
@@ -629,7 +639,7 @@ def grad(x, *args):
629
639
if _dal_ready :
630
640
631
641
def make_ncg_funcs (f , value = False , gradient = False , hessian = False ):
632
- daal_penaltyL2 = 1.0 / (2 * C * C_daal_multiplier )
642
+ daal_penaltyL2 = 1.0 / (2 * C * C_daal_multiplier * sw_sum )
633
643
_obj_ , X_ , y_ , n_samples = daal_extra_args_func (
634
644
classes .size ,
635
645
w0 ,
@@ -662,10 +672,10 @@ def _func_(x, *args):
662
672
)
663
673
else :
664
674
if sklearn_check_version ("1.1" ):
665
- l2_reg_strength = 1.0 / C
675
+ l2_reg_strength = 1.0 / ( C * sw_sum )
666
676
args = (X , target , sample_weight , l2_reg_strength , n_threads )
667
677
else :
668
- args = (X , target , 1.0 / C , sample_weight )
678
+ args = (X , target , 1.0 / ( C * sw_sum ) , sample_weight )
669
679
670
680
w0 , n_iter_i = _newton_cg (
671
681
hess , func , grad , w0 , args = args , maxiter = max_iter , tol = tol
0 commit comments