Skip to content

Commit

Permalink
Revert "FIX: make LinearRegression perfectly consistent across sparse…
Browse files Browse the repository at this point in the history
… or dense (scikit-learn#13279)"

This reverts commit 41f106c.
  • Loading branch information
Xing committed Apr 28, 2019
1 parent 7cc75c8 commit 7691db5
Show file tree
Hide file tree
Showing 3 changed files with 3 additions and 41 deletions.
5 changes: 0 additions & 5 deletions doc/whats_new/v0.21.rst
Original file line number Diff line number Diff line change
Expand Up @@ -200,11 +200,6 @@ Support for Python 3.4 and below has been officially dropped.
parameter value ``copy_X=True`` in ``fit``.
:issue:`12972` by :user:`Lucio Fernandez-Arjona <luk-f-a>`

- |Fix| Fixed a bug in :class:`linear_model.LinearRegression` that
was not returning the same coeffecients and intercepts with
``fit_intercept=True`` in sparse and dense case.
:issue:`13279` by `Alexandre Gramfort`_

:mod:`sklearn.manifold`
............................

Expand Down
19 changes: 3 additions & 16 deletions sklearn/linear_model/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -467,34 +467,21 @@ def fit(self, X, y, sample_weight=None):

X, y, X_offset, y_offset, X_scale = self._preprocess_data(
X, y, fit_intercept=self.fit_intercept, normalize=self.normalize,
copy=self.copy_X, sample_weight=sample_weight,
return_mean=True)
copy=self.copy_X, sample_weight=sample_weight)

if sample_weight is not None:
# Sample weight can be implemented via a simple rescaling.
X, y = _rescale_data(X, y, sample_weight)

if sp.issparse(X):
X_offset_scale = X_offset / X_scale

def matvec(b):
return X.dot(b) - b.dot(X_offset_scale)

def rmatvec(b):
return X.T.dot(b) - X_offset_scale * np.sum(b)

X_centered = sparse.linalg.LinearOperator(shape=X.shape,
matvec=matvec,
rmatvec=rmatvec)

if y.ndim < 2:
out = sparse_lsqr(X_centered, y)
out = sparse_lsqr(X, y)
self.coef_ = out[0]
self._residues = out[3]
else:
# sparse_lstsq cannot handle y with shape (M, K)
outs = Parallel(n_jobs=n_jobs_)(
delayed(sparse_lsqr)(X_centered, y[:, j].ravel())
delayed(sparse_lsqr)(X, y[:, j].ravel())
for j in range(y.shape[1]))
self.coef_ = np.vstack([out[0] for out in outs])
self._residues = np.vstack([out[3] for out in outs])
Expand Down
20 changes: 0 additions & 20 deletions sklearn/linear_model/tests/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,26 +154,6 @@ def test_linear_regression_sparse(random_state=0):
assert_array_almost_equal(ols.predict(X) - y.ravel(), 0)


@pytest.mark.parametrize('normalize', [True, False])
@pytest.mark.parametrize('fit_intercept', [True, False])
def test_linear_regression_sparse_equal_dense(normalize, fit_intercept):
# Test that linear regression agrees between sparse and dense
rng = check_random_state(0)
n_samples = 200
n_features = 2
X = rng.randn(n_samples, n_features)
X[X < 0.1] = 0.
Xcsr = sparse.csr_matrix(X)
y = rng.rand(n_samples)
params = dict(normalize=normalize, fit_intercept=fit_intercept)
clf_dense = LinearRegression(**params)
clf_sparse = LinearRegression(**params)
clf_dense.fit(X, y)
clf_sparse.fit(Xcsr, y)
assert clf_dense.intercept_ == pytest.approx(clf_sparse.intercept_)
assert_allclose(clf_dense.coef_, clf_sparse.coef_)


def test_linear_regression_multiple_outcome(random_state=0):
# Test multiple-outcome linear regressions
X, y = make_regression(random_state=random_state)
Expand Down

0 comments on commit 7691db5

Please sign in to comment.