-
Notifications
You must be signed in to change notification settings - Fork 205
/
Copy pathvector.py
190 lines (143 loc) · 7.04 KB
/
vector.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
# MIT License
#
# Copyright (C) IBM Corporation 2019
#
# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
# documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit
# persons to whom the Software is furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the
# Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
# WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
"""
The vector mechanism in differential privacy, for producing perturbed objectives
"""
from numbers import Real
import numpy as np
from diffprivlib.mechanisms.base import DPMechanism
from diffprivlib.utils import copy_docstring
class Vector(DPMechanism):
r"""
The vector mechanism in differential privacy.
The vector mechanism is used when perturbing convex objective functions.
Full paper: http://www.jmlr.org/papers/volume12/chaudhuri11a/chaudhuri11a.pdf
Parameters
----------
epsilon : float
Privacy parameter :math:`\epsilon` for the mechanism. Must be in (0, ∞].
function_sensitivity : float
The function sensitivity of the mechanism. Must be in [0, ∞).
data_sensitivity : float, default: 1.0
The data sensitivity of the mechanism. Must be in [0, ∞).
dimension : int
Function input dimension. This dimension relates to the size of the input vector of the function being
considered by the mechanism. This corresponds to the size of the random vector produced by the mechanism. Must
be in [1, ∞).
alpha : float, default: 0.01
Regularisation parameter. Must be in (0, ∞).
n : int, default: 1
Size of the training dataset, required to calibrate the influence of the random vector in the objective.
random_state : int or RandomState, optional
Controls the randomness of the mechanism. To obtain a deterministic behaviour during randomisation,
``random_state`` has to be fixed to an integer.
"""
def __init__(self, *, epsilon, function_sensitivity, data_sensitivity=1.0, dimension, alpha=0.01, n=1,
random_state=None):
super().__init__(epsilon=epsilon, delta=0.0, random_state=random_state)
self.function_sensitivity, self.data_sensitivity = self._check_sensitivity(function_sensitivity,
data_sensitivity)
self.dimension = self._check_dimension(dimension)
self.alpha = self._check_alpha(alpha)
self.n = int(n)
@classmethod
def _check_epsilon_delta(cls, epsilon, delta):
if not delta == 0:
raise ValueError("Delta must be zero")
return super()._check_epsilon_delta(epsilon, delta)
@classmethod
def _check_alpha(cls, alpha):
if not isinstance(alpha, Real):
raise TypeError("Alpha must be numeric")
if alpha <= 0:
raise ValueError("Alpha must be strictly positive")
return alpha
@classmethod
def _check_dimension(cls, vector_dim):
if not isinstance(vector_dim, Real) or not np.isclose(vector_dim, int(vector_dim)):
raise TypeError("d must be integer-valued")
if int(vector_dim) < 1:
raise ValueError("d must be strictly positive")
return int(vector_dim)
@classmethod
def _check_sensitivity(cls, function_sensitivity, data_sensitivity):
if not isinstance(function_sensitivity, Real) or not isinstance(data_sensitivity, Real):
raise TypeError("Sensitivities must be numeric")
if function_sensitivity < 0 or data_sensitivity < 0:
raise ValueError("Sensitivities must be non-negative")
return function_sensitivity, data_sensitivity
def _check_all(self, value):
super()._check_all(value)
self._check_alpha(self.alpha)
self._check_sensitivity(self.function_sensitivity, self.data_sensitivity)
self._check_dimension(self.dimension)
if self.n < 1:
raise ValueError(f"n must be strictly positive, got {self.n}")
if not callable(value):
raise TypeError("Value to be randomised must be a function")
return True
@copy_docstring(DPMechanism.bias)
def bias(self, value):
raise NotImplementedError
@copy_docstring(DPMechanism.variance)
def variance(self, value):
raise NotImplementedError
def randomise(self, value):
"""Randomise `value` with the mechanism.
If `value` is a method of two outputs, they are taken as `f` and `fprime` (i.e., its gradient), and both are
perturbed accordingly.
Parameters
----------
value : method
The function to be randomised.
Returns
-------
method
The randomised method.
"""
self._check_all(value)
epsilon_p = self.epsilon - 2 * np.log(1 + self.function_sensitivity * self.data_sensitivity / self.alpha)
delta = 0
if epsilon_p <= 0:
delta = (self.function_sensitivity * self.data_sensitivity / np.expm1(self.epsilon / 4)
- self.alpha) / self.n
epsilon_p = self.epsilon / 2
scale = self.data_sensitivity * 2 / epsilon_p
try:
normed_noisy_vector = self._rng.standard_normal((self.dimension, 4)).sum(axis=1) / 2
noisy_norm = self._rng.gamma(self.dimension / 4, scale, 4).sum()
except AttributeError: # rng is secrets.SystemRandom
normed_noisy_vector = np.reshape([self._rng.normalvariate(0, 1) for _ in range(self.dimension * 4)],
(-1, 4)).sum(axis=1) / 2
noisy_norm = sum(self._rng.gammavariate(self.dimension / 4, scale) for _ in range(4)) if scale > 0 else 0.0
norm = np.linalg.norm(normed_noisy_vector, 2)
normed_noisy_vector = normed_noisy_vector / norm * noisy_norm
def output_func(*args):
input_vec = args[0]
func = value(*args)
if isinstance(func, tuple):
func, grad = func
else:
grad = None
func += np.dot(normed_noisy_vector, input_vec) / self.n
func += 0.5 * delta * np.dot(input_vec, input_vec)
if grad is not None:
grad += normed_noisy_vector / self.n + delta * input_vec
return func, grad
return func
return output_func