Load data

In [None]:
!pip install tensorflow-privacy

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader

from torch import Tensor
import numpy as np
from util import task_sampling, average_vars, meta_update, gradient_clipping, Add_noise, average_vars_batch
import copy
from numpy.linalg import norm
from torch.utils.data import DataLoader
from tensorflow_privacy.privacy.analysis.compute_noise_from_budget_lib import compute_noise


Collecting tensorflow-privacy
  Downloading tensorflow_privacy-0.8.0-py3-none-any.whl (287 kB)
[?25l[K     |█▏                              | 10 kB 31.9 MB/s eta 0:00:01[K     |██▎                             | 20 kB 39.4 MB/s eta 0:00:01[K     |███▍                            | 30 kB 46.8 MB/s eta 0:00:01[K     |████▋                           | 40 kB 34.8 MB/s eta 0:00:01[K     |█████▊                          | 51 kB 38.6 MB/s eta 0:00:01[K     |██████▉                         | 61 kB 43.8 MB/s eta 0:00:01[K     |████████                        | 71 kB 28.9 MB/s eta 0:00:01[K     |█████████▏                      | 81 kB 30.5 MB/s eta 0:00:01[K     |██████████▎                     | 92 kB 33.1 MB/s eta 0:00:01[K     |███████████▍                    | 102 kB 32.5 MB/s eta 0:00:01[K     |████████████▌                   | 112 kB 32.5 MB/s eta 0:00:01[K     |█████████████▊                  | 122 kB 32.5 MB/s eta 0:00:01[K     |██████████████▉                 | 

In [None]:
N_train_task = 1000
N_sample_per_task = 10
N_test_task = 400

train_path = './data/train/'
test_path = './data/test/'

In [None]:
!mkdir data
!mkdir data/test
!mkdir data/train

Data Generation:

In [None]:

def data_generation():
    d = 30    #dimension

    w_bar_1 = np.concatenate(([2]*10, [0]*10, [0]*10))
    w_bar_2 = np.concatenate(([0]*10, [-4]*10, [0]*10))
    w_bar_3 = np.concatenate(([0]*10, [0]*10, [6]*10))


    N_train_task = 1000
    N_sample_per_task = 10
    N_test_task = 500
    sigma = 0.5
    noise_sigma = 0.5
    train_path = './data/train/'
    test_path = './data/test/'

    for i in range(N_train_task):

        choice = np.random.rand()
        if choice < 0.33:
            w_bar = w_bar_1
        elif choice <0.67:
            w_bar = w_bar_2
        else:
            w_bar = w_bar_3

        w = np.random.normal(w_bar, sigma).reshape(d, 1)
        X = np.random.random((N_sample_per_task, d))
        l2norm = norm(X, axis=1, ord=2)
        X = X/l2norm[:, None]
        noise = np.random.normal(0, noise_sigma, N_sample_per_task).reshape(N_sample_per_task, 1)
        y = np.matmul(X, w) + noise

        tensor_X = torch.Tensor(X)
        tensor_y = torch.Tensor(y)

        dataset = TensorDataset(tensor_X, tensor_y)
        torch.save(dataset, train_path+str(i+1)+'.pt')

    for i in range(N_test_task):

        choice = np.random.rand()
        if choice < 0.33:
            w_bar = w_bar_1
        elif choice < 0.67:
            w_bar = w_bar_2
        else:
            w_bar = w_bar_3
        w = np.random.normal(w_bar, sigma).reshape(d, 1)
        X = np.random.random((N_sample_per_task*5, d))
        l2norm = norm(X, axis=1, ord=2)
        X = X / l2norm[:, None]
        noise = np.random.normal(0, noise_sigma, N_sample_per_task*5).reshape(N_sample_per_task*5, 1)
        y = np.matmul(X, w) + noise

        tensor_X = torch.Tensor(X)
        tensor_y = torch.Tensor(y)

        dataset = TensorDataset(tensor_X, tensor_y)
        torch.save(dataset, test_path + str(i + 1) + '.pt')

Model definition

In [None]:
class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.fc1 = nn.Linear(30, 1, bias=False)

    def forward(self, x):
        x = self.fc1(x)
        return x

def model_train(model, dataloader, old_parameters, ntier, optim, loss_fn, lam_reg):

    for _ in range(ntier):
        inputs, labels = next(iter(dataloader))
        optim.zero_grad()
        predict = model(inputs)
        loss = loss_fn(predict, labels)
        loss_fn_2 = nn.MSELoss(reduction='sum')

        loss_w = [loss_fn_2(w, w_meta) for w, w_meta in zip(model.parameters(), old_parameters)]
        loss_w = torch.sum(torch.stack(loss_w))

        loss += lam_reg*loss_w
        
        loss.backward()
        optim.step()


def train_loss(model, dataloader, old_parameters, loss_fn, lam_reg):
    loss = 0
    loss_w = 0
    idx = 0
    for ipt, label in dataloader:
        loss += loss_fn(model(ipt), label).item()
        idx += 1
    loss_fn_2 = nn.MSELoss(reduction='sum')
    loss_w = [loss_fn_2(w, w_meta) for w, w_meta in zip(model.parameters(), old_parameters)]
    loss_w = torch.sum(torch.stack(loss_w))
    loss_w = lam_reg*(loss_w.item())
    return (loss/idx + loss_w)

DP-SGD

In [None]:
from torch.autograd import grad
from torch.nn import parameter
class MetaDPSGD():

    def __init__(self, models, q):
        self.models = models
        self.keys = list(models[0].state_dict().keys())
        self.q = q

    def train_step(self, N_train_task,  inner_iters,
                   meta_step_size, meta_batch_size,
                lam_reg, optimizer_list, loss, maximum_norm, noise_multiplier, flag):

        """
        Perform one training step of meta DP-SGD
        :param base_learner: the model of base_learner
        :param datasets: the data set
        :param inner_iters: number of inner-loop iterations
        :param meta_step_size: step size for meta algorithm
        :param meta_batch_size: number of tasks sampled in each iteration
        :param sigma: std of added noise to preserve privacy
        :param lam_reg: regularization parameter
        :return:
        """

        new_vars = [[] for _ in range(self.q)]
        
        task_index = task_sampling(N_train_task, meta_batch_size)

        
        old_state_dict_list = [copy.deepcopy(self.models[i].state_dict()) for i in range(self.q)]
        old_parameters_list = [[copy.deepcopy(para) for para in self.models[i].parameters()] for i in range(self.q)]
        for task in task_index:
            best_model_index = -1
            best_parameters = []
            best_training_loss = np.inf
            train_data = torch.load(train_path+str(task)+'.pt', map_location='cuda:0')
            train_dataloader = DataLoader(train_data, batch_size=4, shuffle=True)
            for i in range(self.q):
                model = self.models[i]
                optim = optimizer_list[i]
                model_train(model, train_dataloader, old_parameters_list[i], inner_iters, optim, loss, lam_reg)
                training_loss = train_loss(model, train_dataloader, old_parameters_list[i], loss, lam_reg)
                if training_loss < best_training_loss:
                    best_model_index = i
                    best_training_loss = training_loss
                    best_parameters = [copy.deepcopy(para) for para in model.parameters()]
                model.load_state_dict(old_state_dict_list[i])

            gradient = [-lam_reg*(v - val) for v, val in zip(best_parameters, old_parameters_list[best_model_index])]
            #print(gradient)
            if flag == True:
                gradient = gradient_clipping(gradient, maximum_norm)
            new_vars[best_model_index].append(gradient)
            
        
        for i in range(self.q):
            if new_vars[i]:
                #gradient = average_vars(new_vars[i])
                gradient= average_vars_batch(new_vars[i], meta_batch_size)
                #print(gradient)

                #gradient = Add_noise(gradient, noise_multiplier, maximum_norm, meta_batch_size)
                new_states = meta_update(old_parameters_list[i], gradient, meta_step_size)
            else:
                new_states = copy.deepcopy(old_parameters_list[i])

            if flag == True:
                new_states = Add_noise(new_states, noise_multiplier, maximum_norm, meta_batch_size, meta_step_size)
            state_dict = {}

            for key, val in zip(self.keys, new_states):
                state_dict[key] = val
            self.models[i].load_state_dict(state_dict)
                
        #print(self.model.state_dict())

    def evaluate(self,  inner_iters, optimizer_list, loss, lam_reg):
        transfer_risk = []
        old_state_dict_list = [copy.deepcopy(self.models[i].state_dict()) for i in range(self.q)]
        old_parameters_list = [[copy.deepcopy(para) for para in self.models[i].parameters()] for i in range(self.q)]
        for j in range(400):
            idx = j+1
            test_data = torch.load(test_path+str(idx)+'.pt', map_location='cuda:0')
            train_set, test_set = torch.utils.data.random_split(test_data, [10, 40])
            train_loader = DataLoader(train_set, batch_size=4, shuffle=True)

            best_model_index = -1
            best_training_loss = np.inf
            best_parameters = []

            for i in range(self.q):
                model = self.models[i]
                optim = optimizer_list[i]
                model_train(model, train_loader, old_parameters_list[i], inner_iters, optim, loss, lam_reg)
                training_loss = train_loss(model, train_loader, old_parameters_list[i], loss, lam_reg)
                if training_loss < best_training_loss:
                    best_model_index = i
                    best_training_loss = training_loss
            

            model = self.models[best_model_index]
            running_loss = 0
            test_loader = DataLoader(test_set, batch_size=1, shuffle=True)
            for ipt, label in test_loader:
                running_loss += loss(model(ipt), label).item()
            transfer_risk.append(running_loss/40)
            for i in range(self.q):
                self.models[i].load_state_dict(old_state_dict_list[i])

        print(np.average(transfer_risk))
        return np.average(transfer_risk)

Running

In [None]:

device = torch.device('cuda:0')
meta_learners = []
optimizers = []
q = 2

for i in range(q):
    model = Model().to(device)
    optimizer = optim.SGD(model.parameters(), lr=0.07)
    meta_learners.append(model)
    optimizers.append(optimizer)

lam_reg = 0.4
inner_iteration = 20
meta_step_size = 0.7

loss_fn = nn.MSELoss()
#-----------------hyperparamters-------------
N_train_task = 50
sampling_rate = 0.05
epoch = 3
maximum_norm = 1
noise_multiplier = 1
#--------------------------------------------------
result = {}
for epsilon in [3,10,10000]:
    result[epsilon] = []
    for N_train_task in range(100, 701, 100):
        print("epsilon:" ,epsilon)
        print("N_train_task", N_train_task)
        running_loss = 0
        for i in range(5): 
            data_generation()
            for j in range(q):
                meta_learner = meta_learners[j]    
                for layer in meta_learner.children():
                    if hasattr(layer, 'reset_parameters'):
                        layer.reset_parameters()
            meta_iteration = int(N_train_task)
            meta_batch_size = 10
            if epsilon > 100:
                noise_multiplier = 0
                maximum_norm = 1
                flag = False
            else:
                noise_multiplier = compute_noise(N_train_task, meta_batch_size, epsilon, meta_batch_size, 1e-5, 1e-6)
                maximum_norm = 1
                flag = True
            meta_SGD = MetaDPSGD(meta_learners, q)
            idx =1
            for _ in range(meta_iteration):
                idx += 1
                if idx%100 == 0:
                    print(idx)
                meta_SGD.train_step(N_train_task, inner_iteration, meta_step_size, meta_batch_size, lam_reg, optimizers, loss_fn, maximum_norm, noise_multiplier, flag)            
                # if idx%200 == 0:
                #      print("evaluation start:..............")
                #      meta_SGD.evaluate(inner_iteration, optimizers, loss_fn, lam_reg)
            print("evaluation start:..............")
            running_loss += meta_SGD.evaluate(inner_iteration, optimizers, loss_fn, lam_reg)
        
        result[epsilon].append(running_loss/5)
    print(result)
print(result)

epsilon: 3
N_train_task 100
DP-SGD with sampling rate = 10% and noise_multiplier = 1.7982448384227756 iterated over 100 steps satisfies differential privacy with eps = 3 and delta = 1e-05.
100
evaluation start:..............
2.5587754706359145
DP-SGD with sampling rate = 10% and noise_multiplier = 1.7982448384227756 iterated over 100 steps satisfies differential privacy with eps = 3 and delta = 1e-05.
100
evaluation start:..............
2.3271878277438383
DP-SGD with sampling rate = 10% and noise_multiplier = 1.7982448384227756 iterated over 100 steps satisfies differential privacy with eps = 3 and delta = 1e-05.
100
evaluation start:..............
3.012020631756666
DP-SGD with sampling rate = 10% and noise_multiplier = 1.7982448384227756 iterated over 100 steps satisfies differential privacy with eps = 3 and delta = 1e-05.
100
evaluation start:..............
2.724055683488677
DP-SGD with sampling rate = 10% and noise_multiplier = 1.7982448384227756 iterated over 100 steps satisfies di

In [None]:
for model in meta_learners:
    print(model.state_dict())

In [None]:
device = torch.device('cuda:0')
meta_learner = Model().to(device)
lam_reg = 0.5
inner_iteration = 20

optimizer = optim.SGD(meta_learner.parameters(), lr=0.07)
loss_fn = nn.MSELoss()

meta_SGD = MetaDPSGD(meta_learner)
meta_SGD.evaluate(inner_iteration, optimizer, loss_fn, lam_reg)