-
Notifications
You must be signed in to change notification settings - Fork 205
/
Copy pathvalidation.py
221 lines (166 loc) · 7.86 KB
/
validation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
# MIT License
#
# Copyright (C) IBM Corporation 2020
#
# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
# documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit
# persons to whom the Software is furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the
# Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
# WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
"""
Validation functions for the differential privacy library
"""
from numbers import Real, Integral
import numpy as np
from diffprivlib.utils import warn_unused_args
def check_epsilon_delta(epsilon, delta, allow_zero=False):
"""Checks that epsilon and delta are valid values for differential privacy. Throws an error if checks fail,
otherwise returns nothing.
As well as the requirements of epsilon and delta separately, both cannot be simultaneously zero, unless
``allow_zero`` is set to ``True``.
Parameters
----------
epsilon : float
Epsilon parameter for differential privacy. Must be non-negative.
delta : float
Delta parameter for differential privacy. Must be on the unit interval, [0, 1].
allow_zero : bool, default: False
Allow epsilon and delta both be zero.
"""
if not isinstance(epsilon, Real) or not isinstance(delta, Real):
raise TypeError("Epsilon and delta must be numeric")
if epsilon < 0:
raise ValueError("Epsilon must be non-negative")
if not 0 <= delta <= 1:
raise ValueError("Delta must be in [0, 1]")
if not allow_zero and epsilon + delta == 0:
raise ValueError("Epsilon and Delta cannot both be zero")
def check_bounds(bounds, shape=0, min_separation=0.0, dtype=float):
"""Input validation for the ``bounds`` parameter.
Checks that ``bounds`` is composed of a list of tuples of the form (lower, upper), where lower <= upper and both
are numeric. Also checks that ``bounds`` contains the appropriate number of dimensions, and that there is a
``min_separation`` between the bounds.
Parameters
----------
bounds : tuple
Tuple of bounds of the form (min, max). `min` and `max` can either be scalars or 1-dimensional arrays.
shape : int, default: 0
Number of dimensions to be expected in ``bounds``.
min_separation : float, default: 0.0
The minimum separation between `lower` and `upper` of each dimension. This separation is enforced if not
already satisfied.
dtype : data-type, default: float
Data type of the returned bounds.
Returns
-------
bounds : tuple
"""
if not isinstance(bounds, tuple):
raise TypeError(f"Bounds must be specified as a tuple of (min, max), got {type(bounds)}.")
if not isinstance(shape, Integral):
raise TypeError(f"shape parameter must be integer-valued, got {type(shape)}.")
lower, upper = bounds
if np.asarray(lower).size == 1 or np.asarray(upper).size == 1:
lower = np.ravel(lower).astype(dtype)
upper = np.ravel(upper).astype(dtype)
else:
lower = np.asarray(lower, dtype=dtype)
upper = np.asarray(upper, dtype=dtype)
if lower.shape != upper.shape:
raise ValueError("lower and upper bounds must be the same shape array")
if lower.ndim > 1:
raise ValueError("lower and upper bounds must be scalar or a 1-dimensional array")
if lower.size not in (1, shape):
raise ValueError(f"lower and upper bounds must have {shape or 1} element(s), got {lower.size}.")
n_bounds = lower.shape[0]
for i in range(n_bounds):
_lower = lower[i]
_upper = upper[i]
if not isinstance(_lower, Real) or not isinstance(_upper, Real):
raise TypeError(f"Each bound must be numeric, got {_lower} ({type(_lower)}) and {_upper} ({type(_upper)}).")
if _lower > _upper:
raise ValueError(f"For each bound, lower bound must be smaller than upper bound, got {lower}, {upper})")
if _upper - _lower < min_separation:
mid = (_upper + _lower) / 2
lower[i] = mid - min_separation / 2
upper[i] = mid + min_separation / 2
if shape == 0:
return lower.item(), upper.item()
if n_bounds == 1:
lower = np.ones(shape, dtype=dtype) * lower.item()
upper = np.ones(shape, dtype=dtype) * upper.item()
return lower, upper
def clip_to_norm(array, clip):
"""Clips the examples of a 2-dimensional array to a given maximum norm.
Parameters
----------
array : np.ndarray
Array to be clipped. After clipping, all examples have a 2-norm of at most `clip`.
clip : float
Norm at which to clip each example
Returns
-------
array : np.ndarray
The clipped array.
"""
if not isinstance(array, np.ndarray):
raise TypeError(f"Input array must be a numpy array, got {type(array)}.")
if array.ndim != 2:
raise ValueError(f"input array must be 2-dimensional, got {array.ndim} dimensions.")
if not isinstance(clip, Real):
raise TypeError(f"Clip value must be numeric, got {type(clip)}.")
if clip <= 0:
raise ValueError(f"Clip value must be strictly positive, got {clip}.")
norms = np.linalg.norm(array, axis=1) / clip
norms[norms < 1] = 1
return array / norms[:, np.newaxis]
def clip_to_bounds(array, bounds):
"""Clips the examples of a 2-dimensional array to given bounds.
Parameters
----------
array : np.ndarray
Array to be clipped. After clipping, all examples have a 2-norm of at most `clip`.
bounds : tuple
Tuple of bounds of the form (min, max) which the array is to be clipped to. `min` and `max` must be scalar,
unless array is 2-dimensional.
Returns
-------
array : np.ndarray
The clipped array.
"""
if not isinstance(array, np.ndarray):
raise TypeError(f"Input array must be a numpy array, got {type(array)}.")
lower, upper = check_bounds(bounds, np.size(bounds[0]), min_separation=0)
clipped_array = array.copy()
if np.allclose(lower, np.min(lower)) and np.allclose(upper, np.max(upper)):
clipped_array = np.clip(clipped_array, np.min(lower), np.max(upper))
else:
if array.ndim != 2:
raise ValueError(f"For non-scalar bounds, input array must be 2-dimensional. Got {array.ndim} dimensions.")
for feature in range(array.shape[1]):
clipped_array[:, feature] = np.clip(array[:, feature], lower[feature], upper[feature])
return clipped_array
class DiffprivlibMixin: # pylint: disable=too-few-public-methods
"""Mixin for Diffprivlib models."""
_check_bounds = staticmethod(check_bounds)
_clip_to_norm = staticmethod(clip_to_norm)
_clip_to_bounds = staticmethod(clip_to_bounds)
_warn_unused_args = staticmethod(warn_unused_args)
# todo: remove when scikit-learn v1.2 is a min requirement
def _validate_params(self):
pass
@staticmethod
def _copy_parameter_constraints(cls, *args):
"""Copies the parameter constraints for `*args` from `cls`
"""
if not hasattr(cls, "_parameter_constraints"):
return {}
return {k: cls._parameter_constraints[k] for k in args if k in cls._parameter_constraints}