# Tutorial DPBag

## Differentially Private Bagging: Improved utility and cheaper privacy than subsample-and-aggregate

This tutorial shows how to use DPBag to achieve differentially private classification model. We are using the well known UCI adult dataset as an example.

Experiment Settings (Import necessary packages and functions)

In [None]:
#%% Necessary Packages
import numpy as np
from tqdm import tqdm
import pandas as pd

#%% Functions
from DPBag_Final import DPBag

# 1. Models
from data_loading import Data_Loading_Adult

print('Finish importing necessary packages and functions')

Set parameters

In [None]:
#%% Parameters

# Select dataset
data_name = 'adult'

# Number of iterations
Iterations = 2

# Algorithm parameters
parameters = dict()

parameters['epsilon'] = 5
parameters['delta'] = 1e-3
parameters['teacher_no'] = 50
parameters['lamda'] = float(2)/parameters['teacher_no']
parameters['part_no'] = 10

print(data_name + ' dataset is selected')
print('parameters are ' + str(parameters))

Output Initialization

In [None]:
# Output initialization
Output_AUC = list()
Output_APR = list()
Output_ACC = list()
Output_Budget = list()
Output_Model = list()

print('Finish Output Initialization')

Run DPBag algorithm

In [None]:

for itr in tqdm(range(Iterations)):
    
    # Load Data
    x_train, y_train, x_valid, y_valid, x_test, y_test = Data_Loading_Adult()
    
    print(data_name + ' Data Loaded')
    
    # DPBag Algorithm
    Temp_ACC, Temp_AUC, Temp_APR, Temp_Budget, Temp_Model = DPBag(x_train, y_train, x_valid, x_test, y_test, parameters)
    
    print('Finish DPBag Algorithm')
        
    # Gather performance metrics
    Output_ACC.append(Temp_ACC)
    Output_AUC.append(Temp_AUC)
    Output_APR.append(Temp_APR)
    Output_Budget.append(Temp_Budget)
    Output_Model.append(Temp_Model)
        

Performance Table

In [None]:
dict_metrics = {'Epsilon':[i+1 for i in range(len(Output_ACC[0]))],
                'Accuracy': np.mean(Output_ACC,0),
                'AUROC': np.mean(Output_AUC,0),
                'AUPRC': np.mean(Output_APR,0),
                'Budget': np.mean(Output_Budget,0)}

Output_Metric = pd.DataFrame(dict_metrics)

# Print Final Metric
print(Output_Metric)

Differentially Private Classification Models

In [None]:
epsilon = 1
print(Output_Model[0][epsilon])