Skip to content

Commit 82aa115

Browse files
committed
Working on docstrings
1 parent 2979f3e commit 82aa115

File tree

4 files changed

+57
-70
lines changed

4 files changed

+57
-70
lines changed

src/skmatter/decomposition/_kernel_pcovc.py

Lines changed: 54 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -69,26 +69,25 @@ class KernelPCovC(LinearClassifierMixin, _BaseKPCov):
6969
If randomized :
7070
run randomized SVD by the method of Halko et al.
7171
72-
classifier: {`LogisticRegression`, `LogisticRegressionCV`, `LinearSVC`, `LinearDiscriminantAnalysis`,
73-
`RidgeClassifier`, `RidgeClassifierCV`, `SGDClassifier`, `Perceptron`, `precomputed`}, default=None
72+
classifier : {instance of `sklearn.svm.SVC`, None}, default=None
7473
The classifier to use for computing
7574
the evidence :math:`{\mathbf{Z}}`.
7675
A pre-fitted classifier may be provided.
76+
If the classifier is not `None`, its kernel parameters
77+
(`kernel`, `gamma`, `degree`, and `coef0`)
78+
must be identical to those passed directly to `KernelPCovC`.
7779
78-
If None, ``sklearn.linear_model.LogisticRegression()``
79-
is used as the classifier.
80-
81-
kernel : {"linear", "poly", "rbf", "sigmoid", "cosine", "precomputed"}, default="linear
80+
kernel : {'linear', 'poly', 'rbf', 'sigmoid', 'precomputed'} or callable, default='rbf'
8281
Kernel.
8382
84-
gamma : {'scale', 'auto'} or float, default=None
83+
gamma : {'scale', 'auto'} or float, default='scale'
8584
Kernel coefficient for rbf, poly and sigmoid kernels. Ignored by other
8685
kernels.
8786
8887
degree : int, default=3
8988
Degree for poly kernels. Ignored by other kernels.
9089
91-
coef0 : float, default=1
90+
coef0 : float, default=0.0
9291
Independent term in poly and sigmoid kernels.
9392
Ignored by other kernels.
9493
@@ -223,6 +222,27 @@ def __init__(
223222
self.classifier = classifier
224223

225224
def fit(self, X, Y):
225+
r"""Fit the model with X and Y.
226+
227+
Parameters
228+
----------
229+
X : numpy.ndarray, shape (n_samples, n_features)
230+
Training data, where n_samples is the number of samples and
231+
n_features is the number of features.
232+
233+
It is suggested that :math:`\mathbf{X}` be centered by its column-
234+
means and scaled. If features are related, the matrix should be scaled
235+
to have unit variance, otherwise :math:`\mathbf{X}` should be
236+
scaled so that each feature has a variance of 1 / n_features.
237+
238+
Y : numpy.ndarray, shape (n_samples,)
239+
Training data, where n_samples is the number of samples.
240+
241+
Returns
242+
-------
243+
self: object
244+
Returns the instance itself.
245+
"""
226246
X, Y = validate_data(self, X, Y, y_numeric=False)
227247
check_classification_targets(Y)
228248
self.classes_ = np.unique(Y)
@@ -347,7 +367,7 @@ def transform(self, X):
347367
"""Apply dimensionality reduction to X.
348368
349369
``X`` is projected on the first principal components as determined by the
350-
modified Kernel PCovR distances.
370+
modified Kernel PCovC distances.
351371
352372
Parameters
353373
----------
@@ -382,7 +402,31 @@ def inverse_transform(self, T):
382402
return super().inverse_transform(T)
383403

384404
def decision_function(self, X=None, T=None):
385-
"""Predicts confidence scores from X or T."""
405+
r"""Predicts confidence scores from X or T.
406+
407+
.. math::
408+
\mathbf{Z} = \mathbf{T} \mathbf{P}_{TZ}
409+
= \mathbf{K} \mathbf{P}_{KT} \mathbf{P}_{TZ}
410+
= \mathbf{K} \mathbf{P}_{KZ}
411+
412+
Parameters
413+
----------
414+
X : ndarray, shape(n_samples, n_features)
415+
Original data for which we want to get confidence scores,
416+
where n_samples is the number of samples and n_features is the
417+
number of features.
418+
419+
T : ndarray, shape (n_samples, n_components)
420+
Projected data for which we want to get confidence scores,
421+
where n_samples is the number of samples and n_components is the
422+
number of components.
423+
424+
Returns
425+
-------
426+
Z : numpy.ndarray, shape (n_samples,) or (n_samples, n_classes)
427+
Confidence scores. For binary classification, has shape `(n_samples,)`,
428+
for multiclass classification, has shape `(n_samples, n_classes)`
429+
"""
386430
check_is_fitted(self, attributes=["pkz_", "ptz_"])
387431

388432
if X is None and T is None:

src/skmatter/decomposition/_kernel_pcovr.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ class KernelPCovR(_BaseKPCov):
8585
If `precomputed`, we assume that the `y` passed to the `fit` function
8686
is the regressed form of the targets :math:`{\mathbf{\hat{Y}}}`.
8787
88-
kernel : {"linear", "poly", "rbf", "sigmoid", "cosine", "precomputed"}, default="linear"
88+
kernel : {'linear', 'poly', 'rbf', 'sigmoid', 'cosine', 'precomputed'} or callable, default='linear'
8989
Kernel.
9090
9191
gamma : float, default=None

src/skmatter/decomposition/_pcovc.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -399,6 +399,7 @@ def decision_function(self, X=None, T=None):
399399
Original data for which we want to get confidence scores,
400400
where n_samples is the number of samples and n_features is the
401401
number of features.
402+
402403
T : ndarray, shape (n_samples, n_components)
403404
Projected data for which we want to get confidence scores,
404405
where n_samples is the number of samples and n_components is the

tests/test_kernel_pcovc.py

Lines changed: 1 addition & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,7 @@ def test_reconstruction_errors(self):
114114

115115
prev_error = error
116116
prev_x_error = x_error
117+
117118
print(x_errors)
118119
print(errors)
119120

@@ -322,66 +323,7 @@ def _linear_kernel(X, Y):
322323
kpcovc.fit(self.X, self.Y)
323324

324325

325-
def test_linear_matches_pcovc(self):
326-
"""Check that KernelPCovR returns the same results as PCovR when using a linear
327-
kernel.
328-
"""
329-
linear_svc = LinearSVC(loss="hinge", fit_intercept=False)
330-
linear_svc.fit(self.X, self.Y)
331-
332-
# common instantiation parameters for the two models
333-
hypers = dict(
334-
mixing=1,
335-
n_components=1,
336-
)
337-
338-
# computing projection and predicton loss with linear KernelPCovR
339-
# and use the alpha from RidgeCV for level regression comparisons
340-
kpcovr = KernelPCovC(
341-
classifier=SVC(kernel="linear"),
342-
kernel="linear",
343-
fit_inverse_transform=True,
344-
center=True,
345-
**hypers,
346-
)
347-
kpcovr.fit(self.X, self.Y)
348-
ly = (
349-
np.linalg.norm(self.Y - kpcovr.predict(self.X)) ** 2.0
350-
/ np.linalg.norm(self.Y) ** 2.0
351-
)
352-
353-
# computing projection and predicton loss with PCovR
354-
ref_pcovr = PCovC(**hypers, classifier=linear_svc, space="sample")
355-
ref_pcovr.fit(self.X, self.Y)
356-
ly_ref = (
357-
np.linalg.norm(self.Y - ref_pcovr.predict(self.X)) ** 2.0
358-
/ np.linalg.norm(self.Y) ** 2.0
359-
)
360326

361-
t_ref = ref_pcovr.transform(self.X)
362-
t = kpcovr.transform(self.X)
363-
364-
print(np.linalg.norm(t_ref - t))
365-
K = kpcovr._get_kernel(self.X)
366-
367-
k_ref = t_ref @ t_ref.T
368-
k = t @ t.T
369-
370-
lk_ref = np.linalg.norm(K - k_ref) ** 2.0 / np.linalg.norm(K) ** 2.0
371-
lk = np.linalg.norm(K - k) ** 2.0 / np.linalg.norm(K) ** 2.0
372-
373-
rounding = 3
374-
# self.assertEqual(
375-
# round(ly, rounding),
376-
# round(ly_ref, rounding),
377-
# )
378-
379-
print(lk, lk_ref)
380-
381-
self.assertEqual(
382-
round(lk, rounding),
383-
round(lk_ref, rounding),
384-
)
385327
class KernelPCovCTestSVDSolvers(KernelPCovCBaseTest):
386328
def test_svd_solvers(self):
387329
"""

0 commit comments

Comments
 (0)