In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, Dataset
import syft as sy
import copy
import numpy as np
import time
from opacus import PrivacyEngine
import time
from datetime import timedelta
from datetime import datetime
from torchsummary import summary
import math
import os
import shutil
import csv
import matplotlib.pyplot as plt

import importlib
importlib.import_module('FLDataset_DatasetFolder')
from FLDataset_DatasetFolder import load_dataset, getActualImgs, npy_loader
from utils import averageModels
from torch.utils.tensorboard import SummaryWriter

In [None]:
class Arguments():
    def __init__(self):
        self.images = 3012
        self.clients = 3
        self.rounds = 61
        self.epochs = 1
        self.local_batches = 20
        self.lr = 0.01
        self.dropout1 = 0.25
        self.dropout2 = 0.5
        self.C = 0.66
        self.drop_rate = 0.1
        self.torch_seed = 0
        self.log_interval = 10
        self.iid = 'iid'
        self.split_size = int(self.images / self.clients)
        self.samples = self.split_size / self.images 
        self.use_cuda = True
        self.save_model = False
        self.save_model_interval = 500
        self.del_runs = True
        self.acc_csv = True
        self.acc_file = '0609_3clients_DP_1.1_clip_60_npy.csv'
        self.loss_csv = False
        self.loss_file = '0608_3clients_DP_0.5_npy.csv'
        # number of classes per client on non iid case 
        self.noniid_classnum = 1
        # transform
        self.transform = transforms.Compose([
                                    transforms.ToPILImage(),
#                                     transforms.Grayscale(num_output_channels=1),
                                    transforms.Resize((32,32)),
                                    transforms.ToTensor()])
        # number of classes
        self.c_num = 3
        # Opacus get privacy spent DELTA
        self.delta = 0.0001
        # epsilon record
        self.eps = []
        self.eps_record = []

args = Arguments()

use_cuda = args.use_cuda and torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
print(device)
kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}


In [None]:
# delete files in runs (Tensorboard)
if args.del_runs==True:
    folder = 'runs'
    for filename in os.listdir(folder):
        file_path = os.path.join(folder, filename)
        try:
            if os.path.isfile(file_path) or os.path.islink(file_path):
                os.unlink(file_path)
            elif os.path.isdir(file_path):
                shutil.rmtree(file_path)
        except Exception as e:
            print('Failed to delete %s. Reason: %s' % (file_path, e))

In [None]:
# create accuracy csv file
def acc_csv(args, rnd, acc):
    if args.acc_csv==True:
        with open("acc_csv_files/"+args.acc_file, 'a') as csvfile:
            writer = csv.writer(csvfile)
            writer.writerow([rnd, acc])

In [None]:
# create loss csv file
def loss_csv(args, rnd, loss):
    if args.loss_csv==True:
        with open("loss_csv_files/"+args.loss_file, 'a') as csvfile:
            writer = csv.writer(csvfile)
            writer.writerow([rnd, loss])

In [None]:
hook = sy.TorchHook(torch)
clients = []

for i in range(args.clients):
    clients.append({'hook': sy.VirtualWorker(hook, id="client{}".format(i+1))})

print(clients)
print("number of clients : ", len(clients))

In [None]:
global_train, train_group = load_dataset(args.clients, \
                                         args.iid, \
                                         args.transform, \
                                         args.c_num, \
                                         args.noniid_classnum)

In [None]:
print(len(global_train))
print(type(global_train))
print(len(train_group))
print(type(train_group))

In [None]:
for inx, client in enumerate(clients):
    trainset_ind_list = list(train_group[inx])
    print("len(client", str(inx), "train set) = ", len(trainset_ind_list))
    client['trainset'] = getActualImgs(global_train, trainset_ind_list, args.local_batches)
    client['samples'] = len(trainset_ind_list) / args.images

In [None]:
# transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))])
data_path = "./FinalCovid19Dataset_npy/test"
global_test_dataset = datasets.DatasetFolder(
                                                root=data_path,
                                                loader=npy_loader,
                                                extensions=tuple(['.npy']),
                                                transform=args.transform
                                            )
# global_test_dataset = CovidDataset('./test.csv', transform=transforms.Compose([Rescale(32), ToTensor()]))
global_test_loader = DataLoader(global_test_dataset, batch_size=args.local_batches, shuffle=True, drop_last=True)
print(len(global_test_loader))

In [None]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(in_channels = 3,
                               out_channels = 32,
                               kernel_size = 3,
                               stride = 1)
#         self.conv1_bn = nn.BatchNorm2d(32)
        self.conv2 = nn.Conv2d(in_channels = 32,
                               out_channels = 64,
                               kernel_size = 3,
                               stride = 1)
        self.fc1 = nn.Linear(14*14*64, 128)
        self.fc2 = nn.Linear(128, 3)

    def forward(self, x):
        x = F.relu(self.conv1(x))
#         x = self.conv1_bn(x)
        x = F.relu(self.conv2(x))        
        x = F.max_pool2d(x, 2, 2)
        x = F.dropout(x, p=args.dropout1)
        x = x.view(-1, 14*14*64)
        x = F.relu(self.fc1(x))
        x = F.dropout(x, p=args.dropout2)
        x = self.fc2(x)
        return F.softmax(x)

In [None]:
def ClientUpdate(args, device, client):
    client['model'].train()
    
    for epoch in range(1, args.epochs + 1):
        for batch_idx, (data, target) in enumerate(client['trainset']):
            data, target = data.to(device), target.to(device)
            client['optim'].zero_grad()
            output = client['model'](data)
            loss = client['criterion'](output, target)
            loss.backward()
            
            client['optim'].step()
            
            
            if batch_idx % args.log_interval == 0 or batch_idx==len(client['trainset'])-1:
                print('Model [{}] Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                    client['hook'].id,
                    epoch, (batch_idx+1) * args.local_batches, len(client['trainset']) * args.local_batches, 
                    100. * (batch_idx+1) / len(client['trainset']), loss.item()/args.log_interval))
                
        eps, alpha = client['optim'].privacy_engine.get_privacy_spent(args.delta)
        print(f"(ε = {eps:.2f}, δ = {args.delta}) for α = {alpha}")
        args.eps.append(round(eps,2))
                

In [None]:
def test(args, model, device, test_loader, name):
    model.eval()   
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
#             data = d['image']
#             target = d['label']
            data, target = data.to(device), target.to(device)
            if(str(device)=='cuda'):
                model.cuda()
            output = model(data.float())
#             test_loss += F.nll_loss(output, target.squeeze(1), reduction='sum').item() # sum up batch loss
            loss_fn = nn.CrossEntropyLoss(reduction='sum')
            test_loss += loss_fn(output, target).item() # sum up batch loss
            pred = output.argmax(1, keepdim=True) # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)

    print('\nTest set: Average loss for {} model: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        name, test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))
    return 100. * correct / len(test_loader.dataset), test_loss

In [None]:
writer = SummaryWriter()

In [None]:
torch.manual_seed(args.torch_seed)
global_model = Net().to(device)
summary(global_model, (3, 32, 32))

In [None]:
# training
for client in clients:
    torch.manual_seed(args.torch_seed)
    client['model'] = Net().to(device)
    client['optim'] = optim.SGD(client['model'].parameters(), lr=args.lr)
    client['criterion'] = nn.CrossEntropyLoss(reduction='mean')
    client['pengine'] = PrivacyEngine(
                                       client['model'],
                                       batch_size=args.local_batches,
                                       sample_size=len(client['trainset']),
                                       sample_rate=args.C,
                                       alphas=[2,3,4,5,6,7,8,10,11,12,14,16,20,24,28,32,64,256],
                                       noise_multiplier=1.1,
                                       max_grad_norm=0.05
                                    )
    client['pengine'].attach(client['optim'])
    
# start training model
training_start_time = time.time()
for fed_round in range(args.rounds):
    print("")
    print("===================================================================")
    print("[round] = ", fed_round+1, "/", args.rounds)
    print("===================================================================")
    
    round_train_start_time = time.time()
    
#     uncomment if you want a randome fraction for C every round
#     args.C = float(format(np.random.random(), '.1f'))    
    
    # number of selected clients
    m = int(max(math.ceil(args.C * args.clients), 1))

    # Selected devices
    np.random.seed(fed_round)
#     m = 2
    selected_clients_inds = np.random.choice(range(len(clients)), m, replace=False)
    selected_clients = [clients[i] for i in selected_clients_inds]
    
    # Active devices
#     np.random.seed(fed_round)
#     active_clients_inds = np.random.choice(selected_clients_inds, int((1-args.drop_rate) * m), replace=False)
#     active_clients = [clients[i] for i in active_clients_inds]
    active_clients = selected_clients
    
    # Training 
    client_cnt = 0
    args.eps = []
    for client in active_clients:
        print("* [client count] = ", client_cnt+1 , "/", len(active_clients))
        client_train_start_time = time.time()
        ClientUpdate(args, device, client)
        client_cnt += 1
        client_train_time = round(time.time()-client_train_start_time)
        print("* [client_train_time] = ", str(timedelta(seconds=(client_train_time))))
        print("---------------------------------------------------------------")
    
    # print eps mean
    print("epsilon mean: ", sum(args.eps) / len(args.eps))
    args.eps_record.append(sum(args.eps) / len(args.eps))
    
    # Averaging 
#     print("active clients: ", active_clients)
    global_model = averageModels(global_model, active_clients)
    
    # Testing the average model
    client_test_start_time = time.time()
    acc, loss = test(args, global_model, device, global_test_loader, 'Global')
    client_test_time = round(time.time()-client_test_start_time)
    print("* [global_test_time] = ", str(timedelta(seconds=(client_test_time))))
    
    writer.add_scalar("Accuracy/train", acc, fed_round)
    writer.flush()
    acc_csv(args, fed_round, acc)
    loss_csv(args, fed_round, loss)
            
    # Share the global model with the clients
    for client in clients:
        client['model'].load_state_dict(global_model.state_dict())
        
    # training time per round
    total_train_time = round(time.time()-training_start_time)
    round_train_time = round(time.time()-round_train_start_time)
    print("** [total train time]: ", str(timedelta(seconds=total_train_time)))
    print("** [round train time]: ", str(timedelta(seconds=round_train_time)))
    
    if (args.save_model and fed_round%args.save_model_interval==0 and fed_round!=0):
        now = datetime.now() 
        date = now.strftime("%Y_%m_%d_%H%M")
        torch.save(global_model.state_dict(), date + "_FedAvg_with_DP_round_" + str(fed_round) + ".pth")
        print("model saved : "+ date +"_FedAvg_with_DP_round_" + str(fed_round) + "10clients.pth")


In [None]:
print(args.eps_record)

In [None]:
x1 = list(range(0, 61))
plt.title("FL with Differential Privacy Training Accuracy")
plt.ylabel("Accuracy (%)")
plt.xlabel("Rounds")

plt.plot(x1, args.eps_record, label='0.0')
plt.grid(color = 'gray', linestyle = '--')
plt.legend(bbox_to_anchor=(1.03, 1.03), loc='upper left', \
           title="Noise Multiplier")
plt.ylim(20, 8000)
plt.xticks(np.arange(0,61,10))
plt.show()

In [None]:
for i in range(61):
    with open("1.1_clip0.05.csv", 'a') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow([i, args.eps_record[i]])