In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, Dataset
import syft as sy
import copy
import numpy as np
import time
from opacus import PrivacyEngine
import time
from datetime import timedelta
from datetime import datetime
from torchsummary import summary
import math
import os
import shutil
import csv

import importlib
importlib.import_module('FLDataset')
from FLDataset import load_dataset, getActualImgs, CovidDataset, Rescale, ToTensor
from utils import averageModels, averageGradients
from torch.utils.tensorboard import SummaryWriter

In [2]:
class Arguments():
    def __init__(self):
        self.images = 3012
        self.clients = 3
        self.rounds = 1001
        self.epochs = 1
        self.local_batches = 20
        self.lr = 0.01
        self.dropout1 = 0.25
        self.dropout2 = 0.5
        self.C = 0.66
        self.drop_rate = 0.1
        self.torch_seed = 0
        self.log_interval = 10
        self.iid = 'iid'
        self.split_size = int(self.images / self.clients)
        self.samples = self.split_size / self.images 
        self.use_cuda = True
        # save model 
        self.save_model = False
        self.save_model_interval = 500
        # clip grad norm
        self.clip = 1
        # delete tensorboard record
        self.del_runs = False
        # accuracy csv file 
        self.acc_csv = False
        self.acc_file = '0514_3clients_withDP.csv'
        # number of classes per client on non iid case 
        self.noniid_classnum = 2
        # data transform
        self.transform = transforms.Compose([Rescale(32), ToTensor()])
        # number of classes
        self.c_num = 3
        # Opacus get privacy spent DELTA
        self.delta = 0.0001

args = Arguments()

use_cuda = args.use_cuda and torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
print(device)
kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}


cuda


In [3]:
# delete files in runs (Tensorboard)
if args.del_runs==True:
    folder = 'runs'
    for filename in os.listdir(folder):
        file_path = os.path.join(folder, filename)
        try:
            if os.path.isfile(file_path) or os.path.islink(file_path):
                os.unlink(file_path)
            elif os.path.isdir(file_path):
                shutil.rmtree(file_path)
        except Exception as e:
            print('Failed to delete %s. Reason: %s' % (file_path, e))

In [4]:
# create accuracy csv file
def acc_csv(args, rnd, acc):
    if args.acc_csv==True:
        with open("acc_csv_files/"+args.acc_file, 'a') as csvfile:
            writer = csv.writer(csvfile)
            writer.writerow([rnd, acc])

In [5]:
hook = sy.TorchHook(torch)
clients = []

for i in range(args.clients):
    clients.append({'hook': sy.VirtualWorker(hook, id="client{}".format(i+1))})

print(clients)
print("number of clients : ", len(clients))

[{'hook': <VirtualWorker id:client1 #objects:0>}, {'hook': <VirtualWorker id:client2 #objects:0>}, {'hook': <VirtualWorker id:client3 #objects:0>}]
number of clients :  3


In [6]:
global_train, global_test, train_group, test_group = load_dataset(args.clients, args.iid, \
                                                                  args.transform, args.c_num, \
                                                                  args.noniid_classnum)

In [7]:
print(len(global_train))
print(type(global_train))
print(len(global_test))
print(type(global_test))
print(len(train_group))
print(type(train_group))
print(len(test_group))
print(type(test_group))

3012
<class 'FLDataset.CovidDataset'>
753
<class 'FLDataset.CovidDataset'>
3
<class 'dict'>
3
<class 'dict'>


In [8]:
for inx, client in enumerate(clients):
    trainset_ind_list = list(train_group[inx])
    print("len(client", str(inx), "train set) = ", len(trainset_ind_list))
    client['trainset'] = getActualImgs(global_train, trainset_ind_list, args.local_batches)
    client['testset'] = getActualImgs(global_test, list(test_group[inx]), args.local_batches)
    client['samples'] = len(trainset_ind_list) / args.images

len(client 0 train set) =  1004
len(client 1 train set) =  1004
len(client 2 train set) =  1004


In [9]:
# transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))])
global_test_dataset = CovidDataset('./test.csv', transform=transforms.Compose([Rescale(32), ToTensor()]))
global_test_loader = DataLoader(global_test_dataset, batch_size=args.local_batches, shuffle=True, drop_last=True)
print(len(global_test_loader))

37


In [10]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(in_channels = 1,
                               out_channels = 32,
                               kernel_size = 3,
                               stride = 1)
#         self.conv1_bn = nn.BatchNorm2d(32)
        self.conv2 = nn.Conv2d(in_channels = 32,
                               out_channels = 64,
                               kernel_size = 3,
                               stride = 1)
        self.fc1 = nn.Linear(14*14*64, 128)
#         self.fc1_bn = nn.BatchNorm1d(128)
        self.fc2 = nn.Linear(128, 3)

    def forward(self, x):
        x = F.relu(self.conv1(x))
#         x = self.conv1_bn(x)
        x = F.relu(self.conv2(x))
        
        x = F.max_pool2d(x, 2, 2)
        x = F.dropout(x, p=args.dropout1)
        x = x.view(-1, 14*14*64)
        x = F.relu(self.fc1(x))
        x = F.dropout(x, p=args.dropout2)
#         x = self.fc1_bn(x)
        x = self.fc2(x)
        return F.softmax(x)

In [11]:
def ClientUpdate(args, device, client):
    client['model'].train()
#     client['model'].send(client['hook'])
    
    for epoch in range(1, args.epochs + 1):
        for batch_idx, (data, target) in enumerate(client['trainset']):
            data, target = data.to(device), target.to(device)
            client['optim'].zero_grad()
#             output = client['model'](data.float())
#             loss = F.nll_loss(output, target.squeeze(1))
            output = client['model'](data)
            loss = client['criterion'](output, target.squeeze(1))
            loss.backward()
            
#             print weight 
#             for name, param in client['model'].named_parameters():
#                 if name=='conv1_bn.weight':
#                     print(name, param.grad)
            
#             torch.nn.utils.clip_grad_norm_(client['model'].parameters(), args.clip)
            client['optim'].step()
            
            
            if batch_idx % args.log_interval == 0 or batch_idx==len(client['trainset'])-1:
#                 loss = loss.get() 
                print('Model [{}] Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                    client['hook'].id,
                    epoch, (batch_idx+1) * args.local_batches, len(client['trainset']) * args.local_batches, 
                    100. * (batch_idx+1) / len(client['trainset']), loss.item()/args.log_interval))
    
        eps, alpha = client['optim'].privacy_engine.get_privacy_spent(args.delta)
        print(f"(ε = {eps:.2f}, δ = {args.delta}) for α = {alpha}")
                
#     client['model'].get() 

In [12]:
def test(args, model, device, test_loader, name):
    model.eval()   
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for d in test_loader:
            data = d['image']
            target = d['label']
            data, target = data.to(device), target.to(device)
            if(str(device)=='cuda'):
                model.cuda()
            output = model(data.float())
#             test_loss += F.nll_loss(output, target.squeeze(1), reduction='sum').item() # sum up batch loss
            loss_fn = nn.CrossEntropyLoss(reduction='sum')
            test_loss += loss_fn(output, target.squeeze(1)).item() # sum up batch loss
            pred = output.argmax(1, keepdim=True) # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)
    
    print('\nTest set: Average loss for {} model: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        name, test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))
    return 100. * correct / len(test_loader.dataset)

In [13]:
writer = SummaryWriter()

In [14]:
torch.manual_seed(args.torch_seed)
global_model = Net().to(device)
summary(global_model, (1, 32, 32))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 32, 30, 30]             320
            Conv2d-2           [-1, 64, 28, 28]          18,496
            Linear-3                  [-1, 128]       1,605,760
            Linear-4                    [-1, 3]             387
Total params: 1,624,963
Trainable params: 1,624,963
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.60
Params size (MB): 6.20
Estimated Total Size (MB): 6.81
----------------------------------------------------------------


  response = command_method(*args_, **kwargs_)


In [None]:
# training
for client in clients:
    torch.manual_seed(args.torch_seed)
    client['model'] = Net().to(device)
    client['optim'] = optim.SGD(client['model'].parameters(), lr=args.lr, momentum = 0.8)
    client['criterion'] = nn.CrossEntropyLoss(reduction='mean')
    client['pengine'] = PrivacyEngine(
                                       client['model'],
                                       batch_size=args.local_batches,
#                                        sample_rate=
                                       sample_size=len(client['trainset']),
                                       alphas=range(2,32),
                                       noise_multiplier=0.5,
                                       max_grad_norm=1.0
                                    )
    client['pengine'].attach(client['optim']) 
    
# start training model
training_start_time = time.time()
for fed_round in range(args.rounds):
    print("")
    print("===================================================================")
    print("[round] = ", fed_round+1, "/", args.rounds)
    print("===================================================================")
    
    round_train_start_time = time.time()
    
#     uncomment if you want a randome fraction for C every round
#     args.C = float(format(np.random.random(), '.1f'))    
    
    # number of selected clients
    m = int(max(math.ceil(args.C * args.clients), 1))

    # Selected devices
    np.random.seed(fed_round)
    selected_clients_inds = np.random.choice(range(len(clients)), m, replace=False)
    selected_clients = [clients[i] for i in selected_clients_inds]
    
    # Active devices
#     np.random.seed(fed_round)
#     active_clients_inds = np.random.choice(selected_clients_inds, int((1-args.drop_rate) * m), replace=False)
#     active_clients = [clients[i] for i in active_clients_inds]
    active_clients = selected_clients
    
    # Training 
    client_cnt = 0
    for client in active_clients:
        print("* [client count] = ", client_cnt+1 , "/", len(active_clients))
        client_train_start_time = time.time()
        ClientUpdate(args, device, client)
        client_cnt += 1
        client_train_time = round(time.time()-client_train_start_time)
        print("* [client_train_time] = ", str(timedelta(seconds=(client_train_time))))
        print("---------------------------------------------------------------")
    
#         # Testing 
#         for client in active_clients:
#             test(args, client['model'], device, client['testset'], client['hook'].id)
    
    # Averaging 
#     print("active clients: ", active_clients)
    global_model = averageModels(global_model, active_clients)
    
    # Testing the average model
    acc = test(args, global_model, device, global_test_loader, 'Global')
    writer.add_scalar("Accuracy/train", acc, fed_round)
    writer.flush()
    acc_csv(args, fed_round, acc)
            
    # Share the global model with the clients
    for client in clients:
        client['model'].load_state_dict(global_model.state_dict())
        
    # training time per round
    total_train_time = round(time.time()-training_start_time)
    round_train_time = round(time.time()-round_train_start_time)
    print("** [total train time]: ", str(timedelta(seconds=total_train_time)))
    print("** [round train time]: ", str(timedelta(seconds=round_train_time)))
    
    if (args.save_model and fed_round%args.save_model_interval==0 and fed_round!=0):
        now = datetime.now() 
        date = now.strftime("%Y_%m_%d_%H%M")
        torch.save(global_model.state_dict(), date + "_FedAvg_with_DP_round_" + str(fed_round) + ".pth")
        print("model saved : "+ date +"_FedAvg_with_DP_round_" + str(fed_round) + "10clients.pth")


  "The sample rate will be defined from ``batch_size`` and ``sample_size``."
  "Secure RNG turned off. This is perfectly fine for experimentation as it allows "
  "The sample rate will be defined from ``batch_size`` and ``sample_size``."
  "Secure RNG turned off. This is perfectly fine for experimentation as it allows "
  "The sample rate will be defined from ``batch_size`` and ``sample_size``."
  "Secure RNG turned off. This is perfectly fine for experimentation as it allows "
  current_tensor = hook_self.torch.native_tensor(*args, **kwargs)



[round] =  1 / 1001
* [client count] =  1 / 2


  response = command_method(*args_, **kwargs_)


(ε = 122.63, δ = 0.0001) for α = 2.0
* [client_train_time] =  0:00:12
---------------------------------------------------------------
* [client count] =  2 / 2
(ε = 122.63, δ = 0.0001) for α = 2.0
* [client_train_time] =  0:00:12
---------------------------------------------------------------

Test set: Average loss for Global model: 1.1222, Accuracy: 296/753 (39%)

** [total train time]:  0:00:29
** [round train time]:  0:00:29

[round] =  2 / 1001
* [client count] =  1 / 2
(ε = 122.63, δ = 0.0001) for α = 2.0
* [client_train_time] =  0:00:12
---------------------------------------------------------------
* [client count] =  2 / 2
(ε = 236.04, δ = 0.0001) for α = 2.0
* [client_train_time] =  0:00:12
---------------------------------------------------------------

Test set: Average loss for Global model: 1.1102, Accuracy: 304/753 (40%)

** [total train time]:  0:00:58
** [round train time]:  0:00:29

[round] =  3 / 1001
* [client count] =  1 / 2
(ε = 349.46, δ = 0.0001) for α = 2.0
* [

(ε = 576.29, δ = 0.0001) for α = 2.0
* [client_train_time] =  0:00:12
---------------------------------------------------------------
* [client count] =  2 / 2
(ε = 689.70, δ = 0.0001) for α = 2.0
* [client_train_time] =  0:00:12
---------------------------------------------------------------

Test set: Average loss for Global model: 1.0647, Accuracy: 343/753 (46%)

** [total train time]:  0:03:23
** [round train time]:  0:00:29

[round] =  8 / 1001
* [client count] =  1 / 2
(ε = 462.87, δ = 0.0001) for α = 2.0
* [client_train_time] =  0:00:12
---------------------------------------------------------------
* [client count] =  2 / 2
(ε = 803.12, δ = 0.0001) for α = 2.0
* [client_train_time] =  0:00:12
---------------------------------------------------------------

Test set: Average loss for Global model: 1.1069, Accuracy: 313/753 (42%)

** [total train time]:  0:03:51
** [round train time]:  0:00:29

[round] =  9 / 1001
* [client count] =  1 / 2
(ε = 576.29, δ = 0.0001) for α = 2.0
* [

(ε = 1029.95, δ = 0.0001) for α = 2.0
* [client_train_time] =  0:00:12
---------------------------------------------------------------
* [client count] =  2 / 2
(ε = 1143.37, δ = 0.0001) for α = 2.0
* [client_train_time] =  0:00:12
---------------------------------------------------------------

Test set: Average loss for Global model: 1.1037, Accuracy: 315/753 (42%)

** [total train time]:  0:06:16
** [round train time]:  0:00:29

[round] =  14 / 1001
* [client count] =  1 / 2
(ε = 1256.78, δ = 0.0001) for α = 2.0
* [client_train_time] =  0:00:12
---------------------------------------------------------------
* [client count] =  2 / 2
(ε = 1143.37, δ = 0.0001) for α = 2.0
* [client_train_time] =  0:00:12
---------------------------------------------------------------

Test set: Average loss for Global model: 1.1044, Accuracy: 314/753 (42%)

** [total train time]:  0:06:45
** [round train time]:  0:00:29

[round] =  15 / 1001
* [client count] =  1 / 2
(ε = 1370.20, δ = 0.0001) for α = 

In [None]:
# tensorboard open 
# tensorboard --logdir=/home/citi302/Desktop/Codefolder/FL_DP_covid/runs