# Module 3 Exercises

This notebook illustrates some of the concepts from Module 3.  Workloads of predicate counting queries and mechanisms for answering them are described below. 

Imports

In [0]:
import numpy as np
import math

Build workload matrix -- for the CDF workload

In [0]:
def cdf_workload(dom):
  # workload matrix representing CDF queries
  # dom: domain size
  return np.tril(np.ones(dom))


In [0]:
W = cdf_workload(4)
print(W)

[[1. 0. 0. 0.]
 [1. 1. 0. 0.]
 [1. 1. 1. 0.]
 [1. 1. 1. 1.]]


Sample data vector

In [0]:
x = np.array([5,10,2,16])

Evaluate queries in CDF workload on sample data

In [204]:
W @ x   # @ is a matrix multiply

array([ 5., 15., 17., 33.])

Laplace mechanism for workloads of queries

In [0]:
def L1_sensitivity(A):  
	"""Return the L1 sensitivity of strategy matrix A: maximum L1 norm of the columns."""
	return float(np.linalg.norm(A, 1))   # implemented in numpy as 1-norm of matrix

def laplace_mechanism(W, x, epsilon=1.0):
  # laplace mechanism on the input workload W
  true_answer = W @ x
  sens = L1_sensitivity(W)
  noise = np.random.laplace(sens/epsilon)
  return true_answer + noise

  

Run the Laplace Mechanism on the workload

In [203]:
laplace_mechanism(W, x)

array([ 8.95410041, 18.95410041, 20.95410041, 36.95410041])

Build strategy matrices

In [0]:
def identity_strategy(dom):
  # Identity strategy (noisy frequency counts)
	return np.eye(dom, dtype=int)

def buildHierarchical(start, end, n, factors):
	"""Builds a hierarchical strategy matrix with branching
	factor determined by the ordered list 'factoring'
	domain size will be the product of the factors
	(for efficiency, we build list of lists, to be converted to matrix later)"""

	m = [ [0]*n ]
	m[0][start:end+1] = [1]*(end+1 - start) 
	if len(factors) >= 1:
		b = factors.pop(0)
		inc = (end - start + 1) // b
	else:
		return m
	for i in range(start, end+1, inc):
		m = m + buildHierarchical(i, i+inc-1, n, factors[:] )  
	return m

#
# Use this function
#
def hier_strategy(dom):
  # dom should be a power of 2
  factors = [2]*(int(math.log(dom))+1)
  return np.array(buildHierarchical(0,dom-1,dom,factors))

In [0]:
identity_strategy(4)

array([[1, 0, 0, 0],
       [0, 1, 0, 0],
       [0, 0, 1, 0],
       [0, 0, 0, 1]])

In [0]:
hier_strategy(4)

array([[1, 1, 1, 1],
       [1, 1, 0, 0],
       [1, 0, 0, 0],
       [0, 1, 0, 0],
       [0, 0, 1, 1],
       [0, 0, 1, 0],
       [0, 0, 0, 1]])

Select-Measure-Reconstruct tools -- error analysis

In [0]:
def total_error(W, A, epsilon=1.0):
  # Total squared error of workload queries in W, using strategy in A
  # Matrix-mechanism error calculation
  Aplus = np.linalg.pinv(A)
  frob_term = np.linalg.norm(W @ Aplus, ord='fro')**2
  sens = L1_sensitivity(A)
  return 2 * (sens/epsilon)**2 * frob_term
  

In [0]:
def laplace_total_error(W, epsilon=1.0):
    # total error of answering the workload using the Laplace mechanism
    return 2.0 * (L1_sensitivity(W)/epsilon)**2 * W.shape[0]

For various domain sizes, compare error on CDF workload using A=Identity and A=Hierarchical

In [0]:
print('dom   Identity', '\t', 'Hier', '\t\t', 'Laplace')  
for dom in [2**i for i in [2,3,4,5,6,7,8]]:
  W_ = cdf_workload(dom)
  A_i = identity_strategy(dom)
  A_h = hier_strategy(dom)
  print(f'{dom:3}', 
        f'{total_error(W_, A_i, 1.0):8.3f}', 
        '\t', 
        f'{total_error(W_, A_h, 1.0):8.3f}',
        '\t', 
        f'{laplace_total_error(W_, 1.0):8.3f}'
       )  

dom   Identity 	 Hier 		 Laplace
  4   20.000 	   46.286 	  128.000
  8   72.000 	  175.543 	 1024.000
 16  272.000 	  303.543 	 8192.000
 32 1056.000 	 1067.711 	 65536.000
 64 4160.000 	 3439.838 	 524288.000
128 16512.000 	 6686.614 	 4194304.000
256 65792.000 	 20267.737 	 33554432.000


Exercise 1: Implement the Matrix Mechanism


In [0]:
def matrix_mechanism(W, A, epsilon=1.0):
  
# measure strategy queries in A using laplace mechanism to get y vector
# reconstruct from y to get x_hat
# return noisy workload answers: W * x_hat


Exercise 2: verify error calculation

For fixed epsilon and input data, run many (500 or more) trials of the matrix mechanism, compute mean squared error.  Compare this number with output of total_error(..) 