From f3934770c09fdb4183822055964043771028aa7c Mon Sep 17 00:00:00 2001 From: Matt Haberland Date: Fri, 8 Dec 2023 10:48:26 -0800 Subject: [PATCH] MAINT: stats.boxcox_normmax: avoid negative overflow --- scipy/stats/_morestats.py | 13 +++++++------ scipy/stats/tests/test_morestats.py | 17 +++++++++++++++-- 2 files changed, 22 insertions(+), 8 deletions(-) diff --git a/scipy/stats/_morestats.py b/scipy/stats/_morestats.py index cf5cd6341901..46e46ce1fe13 100644 --- a/scipy/stats/_morestats.py +++ b/scipy/stats/_morestats.py @@ -1131,7 +1131,7 @@ def boxcox(x, lmbda=None, alpha=None, optimizer=None): def _boxcox_inv_lmbda(x, y): # compute lmbda given x and y for Box-Cox transformation - num = special.lambertw(-(x ** (-1 / y)) * np.log(x) / y, k=1) + num = special.lambertw(-(x ** (-1 / y)) * np.log(x) / y, k=-1) return np.real(-num / np.log(x) - 1 / y) @@ -1314,8 +1314,8 @@ def _all(x): else: # Test if the optimal lambda causes overflow x = np.asarray(x) - max_x = np.max(x, axis=0) - istransinf = np.isinf(special.boxcox(max_x, res)) + x_treme = np.max(x, axis=0) if res > 0 else np.min(x, axis=0) + istransinf = np.isinf(special.boxcox(x_treme, res)) dtype = x.dtype if np.issubdtype(x.dtype, np.floating) else np.float64 if np.any(istransinf): warnings.warn( @@ -1326,9 +1326,10 @@ def _all(x): ) # Return the constrained lambda to ensure the transformation - # does not cause overflow - ymax = np.finfo(dtype).max / 100 # 100 is the safety factor - constrained_res = _boxcox_inv_lmbda(max_x, ymax) + # does not cause overflow. 10000 is a safety factor because + # `special.boxcox` overflows prematurely. + ymax = np.finfo(dtype).max / 10000 + constrained_res = _boxcox_inv_lmbda(x_treme, ymax * np.sign(res)) if isinstance(res, np.ndarray): res[istransinf] = constrained_res diff --git a/scipy/stats/tests/test_morestats.py b/scipy/stats/tests/test_morestats.py index 9444ee2d1819..fedb42137396 100644 --- a/scipy/stats/tests/test_morestats.py +++ b/scipy/stats/tests/test_morestats.py @@ -15,8 +15,7 @@ import pytest from pytest import raises as assert_raises import re -from scipy import optimize -from scipy import stats +from scipy import optimize, stats, special from scipy.stats._morestats import _abw_state, _get_As_weibull, _Avals_weibull from .common_tests import check_named_results from .._hypotests import _get_wilcoxon_distr, _get_wilcoxon_distr2 @@ -2076,6 +2075,20 @@ def test_user_defined_optimizer_and_brack_raises_error(self): stats.boxcox_normmax(self.x, brack=(-2.0, 2.0), optimizer=optimizer) + @pytest.mark.parametrize( + 'x', ([2003.0, 1950.0, 1997.0, 2000.0, 2009.0], + [0.50000471, 0.50004979, 0.50005902, 0.50009312, 0.50001632])) + def test_overflow(self, x): + message = "The optimal lambda is..." + with pytest.warns(UserWarning, match=message): + lmbda = stats.boxcox_normmax(x, method='mle') + assert np.isfinite(special.boxcox(x, lmbda)).all() + # 10000 is safety factor used in boxcox_normmax + ymax = np.finfo(np.float64).max / 10000 + x_treme = np.max(x) if lmbda > 0 else np.min(x) + y_extreme = special.boxcox(x_treme, lmbda) + assert_allclose(y_extreme, ymax * np.sign(lmbda)) + class TestBoxcoxNormplot: def setup_method(self):