Skip to content

Commit 65afadb

Browse files
DOC: Add info about incremental algorithms and BS (#2103) (#2112)
* DOC: Add info about incremental algorithms and BS (#2103) - Add 'Non-Scikit algorithms' part to the docs - Add info about IncrementalPCA - Add sphinx.napoleon extension to generate docs from docstrings. - Update docstrings for non-scikit algorithms * Remove info about 2025.1 changes from BS and IncBS docstrings --------- Co-authored-by: Samir Nasibli <samir.nasibli@intel.com>
1 parent e0535f6 commit 65afadb

File tree

8 files changed

+186
-41
lines changed

8 files changed

+186
-41
lines changed

doc/sources/algorithms.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,9 @@ Dimensionality Reduction
159159

160160
- ``svd_solver`` not in [`'full'`, `'covariance_eigh'`]
161161
- Sparse data is not supported
162+
* - `IncrementalPCA`
163+
- All parameters are supported
164+
- Sparse data is not supported
162165
* - `TSNE`
163166
- All parameters are supported except:
164167

doc/sources/conf.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@
6767
"notfound.extension",
6868
"sphinx_design",
6969
"sphinx_copybutton",
70+
"sphinx.ext.napoleon",
7071
]
7172

7273
# Add any paths that contain templates here, relative to this directory.

doc/sources/index.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,7 @@ Enable Intel(R) GPU optimizations
105105
algorithms.rst
106106
oneAPI and GPU support <oneapi-gpu.rst>
107107
distributed-mode.rst
108+
non-scikit-algorithms.rst
108109
array_api.rst
109110
verbose.rst
110111
deprecation.rst

doc/sources/non-scikit-algorithms.rst

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
.. ******************************************************************************
2+
.. * Copyright 2024 Intel Corporation
3+
.. *
4+
.. * Licensed under the Apache License, Version 2.0 (the "License");
5+
.. * you may not use this file except in compliance with the License.
6+
.. * You may obtain a copy of the License at
7+
.. *
8+
.. * http://www.apache.org/licenses/LICENSE-2.0
9+
.. *
10+
.. * Unless required by applicable law or agreed to in writing, software
11+
.. * distributed under the License is distributed on an "AS IS" BASIS,
12+
.. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
.. * See the License for the specific language governing permissions and
14+
.. * limitations under the License.
15+
.. *******************************************************************************/
16+
17+
Non-Scikit-Learn Algorithms
18+
===========================
19+
Algorithms not presented in the original scikit-learn are described here. All algorithms are
20+
available for both CPU and GPU (including distributed mode)
21+
22+
BasicStatistics
23+
---------------
24+
.. autoclass:: sklearnex.basic_statistics.BasicStatistics
25+
.. automethod:: sklearnex.basic_statistics.BasicStatistics.fit
26+
27+
IncrementalBasicStatistics
28+
--------------------------
29+
.. autoclass:: sklearnex.basic_statistics.IncrementalBasicStatistics
30+
.. automethod:: sklearnex.basic_statistics.IncrementalBasicStatistics.fit
31+
.. automethod:: sklearnex.basic_statistics.IncrementalBasicStatistics.partial_fit
32+
33+
IncrementalEmpiricalCovariance
34+
------------------------------
35+
.. autoclass:: sklearnex.covariance.IncrementalEmpiricalCovariance
36+
.. automethod:: sklearnex.covariance.IncrementalEmpiricalCovariance.fit
37+
.. automethod:: sklearnex.covariance.IncrementalEmpiricalCovariance.partial_fit
38+
39+
IncrementalLinearRegression
40+
---------------------------
41+
.. autoclass:: sklearnex.linear_model.IncrementalLinearRegression
42+
.. automethod:: sklearnex.linear_model.IncrementalLinearRegression.fit
43+
.. automethod:: sklearnex.linear_model.IncrementalLinearRegression.partial_fit
44+
.. automethod:: sklearnex.linear_model.IncrementalLinearRegression.predict

sklearnex/basic_statistics/basic_statistics.py

Lines changed: 30 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -32,12 +32,16 @@ class BasicStatistics(BaseEstimator):
3232
"""
3333
Estimator for basic statistics.
3434
Allows to compute basic statistics for provided data.
35+
3536
Parameters
3637
----------
3738
result_options: string or list, default='all'
38-
List of statistics to compute
39+
Used to set statistics to calculate. Possible values are ``'min'``, ``'max'``, ``'sum'``, ``'mean'``, ``'variance'``,
40+
``'variation'``, ``sum_squares'``, ``sum_squares_centered'``, ``'standard_deviation'``, ``'second_order_raw_moment'``
41+
or a list containing any of these values. If set to ``'all'`` then all possible statistics will be
42+
calculated.
3943
40-
Attributes (are existing only if corresponding result option exists)
44+
Attributes
4145
----------
4246
min : ndarray of shape (n_features,)
4347
Minimum of each feature over all samples.
@@ -59,6 +63,27 @@ class BasicStatistics(BaseEstimator):
5963
Centered sum of squares for each feature over all samples.
6064
second_order_raw_moment : ndarray of shape (n_features,)
6165
Second order moment of each feature over all samples.
66+
67+
Note
68+
----
69+
Attribute exists only if corresponding result option has been provided.
70+
71+
Note
72+
----
73+
Some results can exhibit small variations due to
74+
floating point error accumulation and multithreading.
75+
76+
Examples
77+
--------
78+
>>> import numpy as np
79+
>>> from sklearnex.basic_statistics import BasicStatistics
80+
>>> bs = BasicStatistics(result_options=['sum', 'min', 'max'])
81+
>>> X = np.array([[1, 2], [3, 4]])
82+
>>> bs.fit(X)
83+
>>> bs.sum_
84+
np.array([4., 6.])
85+
>>> bs.min_
86+
np.array([1., 2.])
6287
"""
6388

6489
def __init__(self, result_options="all"):
@@ -113,14 +138,14 @@ def fit(self, X, y=None, *, sample_weight=None):
113138
Parameters
114139
----------
115140
X : array-like of shape (n_samples, n_features)
116-
Data for compute, where `n_samples` is the number of samples and
117-
`n_features` is the number of features.
141+
Data for compute, where ``n_samples`` is the number of samples and
142+
``n_features`` is the number of features.
118143
119144
y : Ignored
120145
Not used, present for API consistency by convention.
121146
122147
sample_weight : array-like of shape (n_samples,), default=None
123-
Weights for compute weighted statistics, where `n_samples` is the number of samples.
148+
Weights for compute weighted statistics, where ``n_samples`` is the number of samples.
124149
125150
Returns
126151
-------

sklearnex/basic_statistics/incremental_basic_statistics.py

Lines changed: 44 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -37,8 +37,10 @@
3737
@control_n_jobs(decorated_methods=["partial_fit", "_onedal_finalize_fit"])
3838
class IncrementalBasicStatistics(BaseEstimator):
3939
"""
40-
Incremental estimator for basic statistics.
41-
Allows to compute basic statistics if data are splitted into batches.
40+
Calculates basic statistics on the given data, allows for computation when the data are split into
41+
batches. The user can use ``partial_fit`` method to provide a single batch of data or use the ``fit`` method to provide
42+
the entire dataset.
43+
4244
Parameters
4345
----------
4446
result_options: string or list, default='all'
@@ -47,10 +49,9 @@ class IncrementalBasicStatistics(BaseEstimator):
4749
batch_size : int, default=None
4850
The number of samples to use for each batch. Only used when calling
4951
``fit``. If ``batch_size`` is ``None``, then ``batch_size``
50-
is inferred from the data and set to ``5 * n_features``, to provide a
51-
balance between approximation accuracy and memory consumption.
52+
is inferred from the data and set to ``5 * n_features``.
5253
53-
Attributes (are existing only if corresponding result option exists)
54+
Attributes
5455
----------
5556
min : ndarray of shape (n_features,)
5657
Minimum of each feature over all samples.
@@ -81,6 +82,38 @@ class IncrementalBasicStatistics(BaseEstimator):
8182
8283
second_order_raw_moment : ndarray of shape (n_features,)
8384
Second order moment of each feature over all samples.
85+
86+
n_samples_seen_ : int
87+
The number of samples processed by the estimator. Will be reset on
88+
new calls to ``fit``, but increments across ``partial_fit`` calls.
89+
90+
batch_size_ : int
91+
Inferred batch size from ``batch_size``.
92+
93+
n_features_in_ : int
94+
Number of features seen during ``fit`` or ``partial_fit``.
95+
96+
Note
97+
----
98+
Attribute exists only if corresponding result option has been provided.
99+
100+
Examples
101+
--------
102+
>>> import numpy as np
103+
>>> from sklearnex.basic_statistics import IncrementalBasicStatistics
104+
>>> incbs = IncrementalBasicStatistics(batch_size=1)
105+
>>> X = np.array([[1, 2], [3, 4]])
106+
>>> incbs.partial_fit(X[:1])
107+
>>> incbs.partial_fit(X[1:])
108+
>>> incbs.sum_
109+
np.array([4., 6.])
110+
>>> incbs.min_
111+
np.array([1., 2.])
112+
>>> incbs.fit(X)
113+
>>> incbs.sum_
114+
np.array([4., 6.])
115+
>>> incbs.max_
116+
np.array([3., 4.])
84117
"""
85118

86119
_onedal_incremental_basic_statistics = staticmethod(onedal_IncrementalBasicStatistics)
@@ -229,14 +262,14 @@ def partial_fit(self, X, sample_weight=None):
229262
Parameters
230263
----------
231264
X : array-like of shape (n_samples, n_features)
232-
Data for compute, where `n_samples` is the number of samples and
233-
`n_features` is the number of features.
265+
Data for compute, where ``n_samples`` is the number of samples and
266+
``n_features`` is the number of features.
234267
235268
y : Ignored
236269
Not used, present for API consistency by convention.
237270
238271
sample_weight : array-like of shape (n_samples,), default=None
239-
Weights for compute weighted statistics, where `n_samples` is the number of samples.
272+
Weights for compute weighted statistics, where ``n_samples`` is the number of samples.
240273
241274
Returns
242275
-------
@@ -261,14 +294,14 @@ def fit(self, X, y=None, sample_weight=None):
261294
Parameters
262295
----------
263296
X : array-like of shape (n_samples, n_features)
264-
Data for compute, where `n_samples` is the number of samples and
265-
`n_features` is the number of features.
297+
Data for compute, where ``n_samples`` is the number of samples and
298+
``n_features`` is the number of features.
266299
267300
y : Ignored
268301
Not used, present for API consistency by convention.
269302
270303
sample_weight : array-like of shape (n_samples,), default=None
271-
Weights for compute weighted statistics, where `n_samples` is the number of samples.
304+
Weights for compute weighted statistics, where ``n_samples`` is the number of samples.
272305
273306
Returns
274307
-------

sklearnex/covariance/incremental_covariance.py

Lines changed: 23 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -44,9 +44,9 @@
4444
@control_n_jobs(decorated_methods=["partial_fit", "fit", "_onedal_finalize_fit"])
4545
class IncrementalEmpiricalCovariance(BaseEstimator):
4646
"""
47-
Incremental estimator for covariance.
48-
Allows to compute empirical covariance estimated by maximum
49-
likelihood method if data are splitted into batches.
47+
Maximum likelihood covariance estimator that allows for the estimation when the data are split into
48+
batches. The user can use the ``partial_fit`` method to provide a single batch of data or use the ``fit`` method to provide
49+
the entire dataset.
5050
5151
Parameters
5252
----------
@@ -79,13 +79,31 @@ class IncrementalEmpiricalCovariance(BaseEstimator):
7979
8080
n_samples_seen_ : int
8181
The number of samples processed by the estimator. Will be reset on
82-
new calls to fit, but increments across ``partial_fit`` calls.
82+
new calls to ``fit``, but increments across ``partial_fit`` calls.
8383
8484
batch_size_ : int
8585
Inferred batch size from ``batch_size``.
8686
8787
n_features_in_ : int
88-
Number of features seen during :term:`fit` `partial_fit`.
88+
Number of features seen during ``fit`` or ``partial_fit``.
89+
90+
Examples
91+
--------
92+
>>> import numpy as np
93+
>>> from sklearnex.covariance import IncrementalEmpiricalCovariance
94+
>>> inccov = IncrementalEmpiricalCovariance(batch_size=1)
95+
>>> X = np.array([[1, 2], [3, 4]])
96+
>>> inccov.partial_fit(X[:1])
97+
>>> inccov.partial_fit(X[1:])
98+
>>> inccov.covariance_
99+
np.array([[1., 1.],[1., 1.]])
100+
>>> inccov.location_
101+
np.array([2., 3.])
102+
>>> inccov.fit(X)
103+
>>> inccov.covariance_
104+
np.array([[1., 1.],[1., 1.]])
105+
>>> inccov.location_
106+
np.array([2., 3.])
89107
"""
90108

91109
_onedal_incremental_covariance = staticmethod(onedal_IncrementalEmpiricalCovariance)

0 commit comments

Comments
 (0)