## Import libraries

In [1]:
# Note: v0.5.0 was the latest version available as of 31 December 2021
! pip install diffprivlib==0.5.0

Collecting diffprivlib==0.5.0
  Downloading diffprivlib-0.5.0.tar.gz (87 kB)
[?25l[K     |███▊                            | 10 kB 21.5 MB/s eta 0:00:01[K     |███████▌                        | 20 kB 12.7 MB/s eta 0:00:01[K     |███████████▎                    | 30 kB 7.6 MB/s eta 0:00:01[K     |███████████████                 | 40 kB 6.6 MB/s eta 0:00:01[K     |██████████████████▊             | 51 kB 4.2 MB/s eta 0:00:01[K     |██████████████████████▌         | 61 kB 4.9 MB/s eta 0:00:01[K     |██████████████████████████▎     | 71 kB 5.3 MB/s eta 0:00:01[K     |██████████████████████████████  | 81 kB 5.6 MB/s eta 0:00:01[K     |████████████████████████████████| 87 kB 2.6 MB/s 
Collecting scipy>=1.5.0
  Downloading scipy-1.7.3-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (38.1 MB)
[K     |████████████████████████████████| 38.1 MB 1.4 MB/s 
Building wheels for collected packages: diffprivlib
  Building wheel for diffprivlib (setup.py) ... [?25l[?25hdone

In [2]:
from diffprivlib.tools.utils import sum
import matplotlib.pyplot as plt
import numpy as np

## Create datasets, and get the sums of these datasets

In [3]:
start_dataset = 0 * np.ones(4)
x_new = np.append(start_dataset,1)
y_new = np.append(start_dataset,0)

In [4]:
print(x_new)
print(y_new)

[0. 0. 0. 0. 1.]
[0. 0. 0. 0. 0.]


In [5]:
sum_thing = np.sum(x_new)
print(sum_thing)

sum_thing_2 = np.sum(y_new)
print(sum_thing_2)

print(sum_thing-sum_thing_2)


1.0
0.0
1.0


## Run bounded sum 10,000 times to see how readily we can tell the difference between these datasets

For a 17-row dataset, we can tell which dataset is which w.p. $\approx \frac{9999}{10,000}$. Crazy!!!

In [6]:
num_vals_power = 6 # means our start array is length 2**num_vals_power = 4. Works for vals \in [2,6]

start_dataset = 0 * np.ones(2**num_vals_power)
x_new = np.append(start_dataset,1)
y_new = np.append(start_dataset,0)

lower = (1+((2**(-53+num_vals_power)-2**(-52))))*0.5
upper = (1+(2**(-53+num_vals_power)))*0.5
eps_val = 1000

arr_1_ans = []
for _ in range(1000):
  arr_1_ans.append(sum(x_new, epsilon=eps_val, bounds = (lower, upper) ))

arr_2_ans = []
for _ in range(1000):
  arr_2_ans.append(sum(y_new, epsilon=eps_val, bounds = (lower, upper) ))

In [7]:
# print the frequency of each answer

the_ans = np.unique(arr_1_ans, return_counts = True)
print(the_ans[0][0])

the_ans = np.unique(arr_2_ans, return_counts = True)
print(the_ans[0][0])

32.50000000000023
32.50000000000022


In [8]:
sum(y_new, epsilon=0.00000001, bounds = (1, 1) )

65.0

In [9]:
y_new

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [10]:
arr_1_ans[0]-arr_2_ans[1]

7.105427357601002e-15

In [11]:
2**-48

3.552713678800501e-15