Working on docstrings

rvasav26 · rvasav26 · commit 82aa115d491c · 2025-06-10T15:51:26.000-05:00
diff --git a/src/skmatter/decomposition/_kernel_pcovc.py b/src/skmatter/decomposition/_kernel_pcovc.py
@@ -69,26 +69,25 @@ class KernelPCovC(LinearClassifierMixin, _BaseKPCov):
         If randomized :
             run randomized SVD by the method of Halko et al.
 
-    classifier: {`LogisticRegression`, `LogisticRegressionCV`, `LinearSVC`, `LinearDiscriminantAnalysis`,
-        `RidgeClassifier`, `RidgeClassifierCV`, `SGDClassifier`, `Perceptron`, `precomputed`}, default=None
+    classifier : {instance of `sklearn.svm.SVC`, None}, default=None
         The classifier to use for computing
         the evidence :math:`{\mathbf{Z}}`.
         A pre-fitted classifier may be provided.
+        If the classifier is not `None`, its kernel parameters
+        (`kernel`, `gamma`, `degree`, and `coef0`)
+        must be identical to those passed directly to `KernelPCovC`.
 
-        If None, ``sklearn.linear_model.LogisticRegression()``
-        is used as the classifier.
-
-    kernel : {"linear", "poly", "rbf", "sigmoid", "cosine", "precomputed"}, default="linear
+    kernel : {'linear', 'poly', 'rbf', 'sigmoid', 'precomputed'} or callable, default='rbf'
         Kernel.
 
-    gamma : {'scale', 'auto'} or float, default=None
+    gamma : {'scale', 'auto'} or float, default='scale'
         Kernel coefficient for rbf, poly and sigmoid kernels. Ignored by other
         kernels.
 
     degree : int, default=3
         Degree for poly kernels. Ignored by other kernels.
 
-    coef0 : float, default=1
+    coef0 : float, default=0.0
         Independent term in poly and sigmoid kernels.
         Ignored by other kernels.
 
@@ -223,6 +222,27 @@ def __init__(
         self.classifier = classifier
 
     def fit(self, X, Y):
+        r"""Fit the model with X and Y.
+
+        Parameters
+        ----------
+        X : numpy.ndarray, shape (n_samples, n_features)
+            Training data, where n_samples is the number of samples and
+            n_features is the number of features.
+
+            It is suggested that :math:`\mathbf{X}` be centered by its column-
+            means and scaled. If features are related, the matrix should be scaled
+            to have unit variance, otherwise :math:`\mathbf{X}` should be
+            scaled so that each feature has a variance of 1 / n_features.
+
+        Y : numpy.ndarray, shape (n_samples,)
+            Training data, where n_samples is the number of samples.
+
+        Returns
+        -------
+        self: object
+            Returns the instance itself.
+        """
         X, Y = validate_data(self, X, Y, y_numeric=False)
         check_classification_targets(Y)
         self.classes_ = np.unique(Y)
@@ -347,7 +367,7 @@ def transform(self, X):
         """Apply dimensionality reduction to X.
 
         ``X`` is projected on the first principal components as determined by the
-        modified Kernel PCovR distances.
+        modified Kernel PCovC distances.
 
         Parameters
         ----------
@@ -382,7 +402,31 @@ def inverse_transform(self, T):
         return super().inverse_transform(T)
 
     def decision_function(self, X=None, T=None):
-        """Predicts confidence scores from X or T."""
+        r"""Predicts confidence scores from X or T.
+
+        .. math::
+            \mathbf{Z} = \mathbf{T} \mathbf{P}_{TZ}
+                       = \mathbf{K} \mathbf{P}_{KT} \mathbf{P}_{TZ}
+                       = \mathbf{K} \mathbf{P}_{KZ}
+
+        Parameters
+        ----------
+        X : ndarray, shape(n_samples, n_features)
+            Original data for which we want to get confidence scores,
+            where n_samples is the number of samples and n_features is the
+            number of features.
+
+        T : ndarray, shape (n_samples, n_components)
+            Projected data for which we want to get confidence scores,
+            where n_samples is the number of samples and n_components is the
+            number of components.
+
+        Returns
+        -------
+        Z : numpy.ndarray, shape (n_samples,) or (n_samples, n_classes)
+            Confidence scores. For binary classification, has shape `(n_samples,)`,
+            for multiclass classification, has shape `(n_samples, n_classes)`
+        """
         check_is_fitted(self, attributes=["pkz_", "ptz_"])
 
         if X is None and T is None:
diff --git a/src/skmatter/decomposition/_kernel_pcovr.py b/src/skmatter/decomposition/_kernel_pcovr.py
@@ -85,7 +85,7 @@ class KernelPCovR(_BaseKPCov):
         If `precomputed`, we assume that the `y` passed to the `fit` function
         is the regressed form of the targets :math:`{\mathbf{\hat{Y}}}`.
 
-    kernel : {"linear", "poly", "rbf", "sigmoid", "cosine", "precomputed"}, default="linear"
+    kernel : {'linear', 'poly', 'rbf', 'sigmoid', 'cosine', 'precomputed'} or callable, default='linear'
         Kernel.
 
     gamma : float, default=None
diff --git a/src/skmatter/decomposition/_pcovc.py b/src/skmatter/decomposition/_pcovc.py
@@ -399,6 +399,7 @@ def decision_function(self, X=None, T=None):
             Original data for which we want to get confidence scores,
             where n_samples is the number of samples and n_features is the
             number of features.
+
         T : ndarray, shape (n_samples, n_components)
             Projected data for which we want to get confidence scores,
             where n_samples is the number of samples and n_components is the
diff --git a/tests/test_kernel_pcovc.py b/tests/test_kernel_pcovc.py
@@ -114,6 +114,7 @@ def test_reconstruction_errors(self):
 
             prev_error = error
             prev_x_error = x_error
+
         print(x_errors)
         print(errors)
 
@@ -322,66 +323,7 @@ def _linear_kernel(X, Y):
                 kpcovc.fit(self.X, self.Y)
 
     
-    def test_linear_matches_pcovc(self):
-        """Check that KernelPCovR returns the same results as PCovR when using a linear
-        kernel.
-        """
-        linear_svc = LinearSVC(loss="hinge", fit_intercept=False)
-        linear_svc.fit(self.X, self.Y)
-
-        # common instantiation parameters for the two models
-        hypers = dict(
-            mixing=1,
-            n_components=1,
-        )
-
-        # computing projection and predicton loss with linear KernelPCovR
-        # and use the alpha from RidgeCV for level regression comparisons
-        kpcovr = KernelPCovC(
-            classifier=SVC(kernel="linear"),
-            kernel="linear",
-            fit_inverse_transform=True,
-            center=True,
-            **hypers,
-        )
-        kpcovr.fit(self.X, self.Y)
-        ly = (
-            np.linalg.norm(self.Y - kpcovr.predict(self.X)) ** 2.0
-            / np.linalg.norm(self.Y) ** 2.0
-        )
-
-        # computing projection and predicton loss with PCovR
-        ref_pcovr = PCovC(**hypers, classifier=linear_svc, space="sample")
-        ref_pcovr.fit(self.X, self.Y)
-        ly_ref = (
-            np.linalg.norm(self.Y - ref_pcovr.predict(self.X)) ** 2.0
-            / np.linalg.norm(self.Y) ** 2.0
-        )
 
-        t_ref = ref_pcovr.transform(self.X)
-        t = kpcovr.transform(self.X)
-
-        print(np.linalg.norm(t_ref - t))
-        K = kpcovr._get_kernel(self.X)
-
-        k_ref = t_ref @ t_ref.T
-        k = t @ t.T
-
-        lk_ref = np.linalg.norm(K - k_ref) ** 2.0 / np.linalg.norm(K) ** 2.0
-        lk = np.linalg.norm(K - k) ** 2.0 / np.linalg.norm(K) ** 2.0
-
-        rounding = 3
-        # self.assertEqual(
-        #     round(ly, rounding),
-        #     round(ly_ref, rounding),
-        # )
-
-        print(lk, lk_ref)
-
-        self.assertEqual(
-            round(lk, rounding),
-            round(lk_ref, rounding),
-        )
 class KernelPCovCTestSVDSolvers(KernelPCovCBaseTest):
     def test_svd_solvers(self):
         """