# Federated Learning Model Poisoning Attack Simulation

In [2]:
!pip install -q flwr[simulation] torch torchvision matplotlib opacus

from collections import OrderedDict
from typing import List, Tuple, Optional, Callable, Dict
from matplotlib import pyplot as plt
import certifi
import ssl
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, random_split, Dataset, Subset
from torchvision.datasets import CIFAR10
import flwr as fl
from flwr.common import Parameters, Scalar, FitRes, Metrics, ReconnectIns
import os
from opacus import PrivacyEngine
from opacus.utils.uniform_sampler import UniformWithReplacementSampler
from tqdm import tqdm

os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE" # fixed something with matplotlib killing the kernel
ssl._create_default_https_context = ssl._create_unverified_context  # fixed something with the ssl certificate of the dataset
DEVICE = torch.device("cpu")  # "cpu" -> train in cpu | "cuda" -> train in gpu

In [3]:
from logging import WARNING # we need those imports to implement the strategy class
from typing import Callable, Dict, List, Optional, Tuple, Union

from flwr.common import (
    EvaluateIns,
    EvaluateRes,
    FitIns,
    FitRes,
    MetricsAggregationFn,
    NDArrays,
    Parameters,
    Scalar,
    ndarrays_to_parameters,
    parameters_to_ndarrays,
)
from flwr.common.logger import log
from flwr.server.client_manager import ClientManager
from flwr.server.client_proxy import ClientProxy

from flwr.server.strategy.aggregate import aggregate, weighted_loss_avg, aggregate_krum
from flwr.server.strategy.strategy import Strategy

In [4]:
CLASSES = ("plane", "car", "bird", "cat", "deer", "dog", "frog", "horse", "ship", "truck") # classes of the example dataset

NUM_CLIENTS = 10 # this is the number of devices participating in the federated learning

BATCH_SIZE = 32 # this is the size of a mini-batch for the training of a CNN using SGD

In [5]:
histories = [None for _ in range(4)]

In [6]:
def load_datasets(): # download and transform cifar-10
    transform = transforms.Compose(
        [transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]
    )
    trainset = CIFAR10("./dataset", train=True, download=True, transform=transform)
    testset = CIFAR10("./dataset", train=False, download=True, transform=transform)

    # split training set into 10 partitions to simulate the individual dataset
    partition_size = len(trainset) // NUM_CLIENTS
    lengths = [partition_size] * NUM_CLIENTS
    datasets = random_split(trainset, lengths, torch.Generator().manual_seed(42))

    # split each partition into train/val and create DataLoader
    trainloaders = []
    valloaders = []
    shadowloaders = []
    ctr = 1
    for ds in datasets:
        len_val = len(ds) // 10  # 10% validation set
        len_train = len(ds) - len_val
        lengths = [len_train, len_val]
        ds_train, ds_val = random_split(ds, lengths, torch.Generator().manual_seed(42))
        trainloaders.append(DataLoader(ds_train, batch_size=BATCH_SIZE, shuffle=True))
        valloaders.append(DataLoader(ds_val, batch_size=BATCH_SIZE))
        ctr = ctr + 1
    testloader = DataLoader(testset, batch_size=BATCH_SIZE)
    return trainloaders, valloaders, testloader, shadowloaders

trainloaders, valloaders, testloader, shadowloaders = load_datasets() # load the datasets

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./dataset/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:01<00:00, 105412006.03it/s]


Extracting ./dataset/cifar-10-python.tar.gz to ./dataset
Files already downloaded and verified


In [14]:
class AttackMLP(nn.Module):
    def __init__(self, input_size, hidden_size=64,out_classes=2):
        super(AttackMLP, self).__init__()
        self.classifier = nn.Sequential(
            nn.Linear(input_size, hidden_size),
            nn.ReLU(inplace=True),
            nn.Linear(hidden_size, out_classes)
        )
    def forward(self, x):
        out = self.classifier(x)
        probas = F.softmax(out, dim=1)
        return out, probas

In [15]:
class LeNet5(nn.Module):
    def __init__(self, num_classes, grayscale=False):
        super(LeNet5, self).__init__()

        self.grayscale = grayscale
        self.num_classes = num_classes

        if self.grayscale:
            in_channels = 1
        else:
            in_channels = 3

        self.features = nn.Sequential(

            nn.Conv2d(in_channels, 6*in_channels, kernel_size=5),
            nn.Tanh(),
            nn.MaxPool2d(kernel_size=2),
            nn.Conv2d(6*in_channels, 16*in_channels, kernel_size=5),
            nn.Tanh(),
            nn.MaxPool2d(kernel_size=2)
        )

        self.classifier = nn.Sequential(
            nn.Linear(16*5*5*in_channels, 120*in_channels),
            nn.Tanh(),
            nn.Linear(120*in_channels, 84*in_channels),
            nn.Tanh(),
            nn.Linear(84*in_channels, num_classes),
        )


    def forward(self, x):
        x = self.features(x)
        x = torch.flatten(x, 1)
        logits = self.classifier(x)
        probas = F.softmax(logits, dim=1)
        return logits, probas

In [16]:
def get_target_delta(data_size: int) -> float: # this is the δ for the (ε, δ) differential privacy
    den = 1
    while data_size // den >= 1:
        den *= 10
    return 1 / den

In [17]:
def train(net, trainloader, epochs: int, nm, mgn, history, verbose=False, repeat=True): # we need a train and a test function that our clients will be using
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(net.parameters())
    net.train()
    delta = get_target_delta(len(trainloader.dataset)) # dp δ

    if repeat == True:
      privacy_engine = PrivacyEngine()
      net, optimizer, trainloader = privacy_engine.make_private(
          module=net,
          optimizer=optimizer, # here we attach the optimizer with the DP engine so that in every stochastic gradient descent step it adds noise + gradient clipping + etc!
          data_loader=trainloader,
          noise_multiplier=nm,
          max_grad_norm=mgn,
      )

    if history is not None:
        privacy_engine.accountant.history = history

    for epoch in range(epochs):

        correct, total, epoch_loss, ctr = 0, 0, 0.0, 0
        for images, labels in trainloader:
            images, labels = images.to(DEVICE), labels.to(DEVICE)
            optimizer.zero_grad()
            outputs, probas = net(images) # do the forward pass
            loss = criterion(outputs, labels) # calculate the loss function
            loss.backward() # calculate the gradients of the loss function

            optimizer.step()  # the optimizer is now a dp-optimizer and this will be a real step or a virtual step depending on the dp state!

            epoch_loss += loss
            total += labels.size(0)
            correct += (torch.max(outputs.data, 1)[1] == labels).sum().item()
            ctr = ctr + 1
        epoch_loss /= len(trainloader.dataset)
        epoch_acc = correct / total

        if verbose: # log information
            print(f"Epoch {epoch+1}: train loss {epoch_loss}, accuracy {epoch_acc}")

    epsilon = privacy_engine.accountant.get_epsilon(delta=delta)
    history = privacy_engine.accountant.history
    return epsilon, history

def trainV2(net, trainloader, epochs: int, verbose=False): # we need a train and a test function that our clients will be using
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(net.parameters())
    net.train()
    for epoch in range(epochs):

        correct, total, epoch_loss = 0, 0, 0.0
        for images, labels in trainloader:
            images, labels = images.to(DEVICE), labels.to(DEVICE)
            optimizer.zero_grad() # reset the gradients
            outputs, _ = net(images) # do the forward pass
            loss = criterion(outputs, labels) # calculate the loss function
            loss.backward() # calculate the gradients of the loss function
            optimizer.step() # do one stochastic gradient descent step
            epoch_loss += loss
            total += labels.size(0)
            correct += (torch.max(outputs.data, 1)[1] == labels).sum().item()
        epoch_loss /= len(trainloader.dataset)
        epoch_acc = correct / total

        if verbose: # log information
            print(f"Epoch {epoch+1}: train loss {epoch_loss}, accuracy {epoch_acc}")

def test(net, testloader):
    criterion = torch.nn.CrossEntropyLoss()
    correct, total, loss = 0, 0, 0.0
    net.eval()
    with torch.no_grad():
        for images, labels in testloader:
            images, labels = images.to(DEVICE), labels.to(DEVICE)
            outputs, probas = net(images)
            loss += criterion(outputs, labels).item()
            _, predicted = torch.max(probas.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    loss /= len(testloader.dataset)
    accuracy = correct / total
    return loss, accuracy

In [18]:
def get_parameters(net) -> List[np.ndarray]: # we need these two function because this way flower knows how
    return [val.cpu().numpy() for _, val in net.state_dict().items()] # to serialize/deserialize data

def set_parameters(net, parameters: List[np.ndarray]):
    params_dict = zip(net.state_dict().keys(), parameters)
    state_dict = OrderedDict({k: torch.Tensor(v) for k, v in params_dict})
    net.load_state_dict(state_dict, strict=True)

In [19]:
class FlowerClient(fl.client.NumPyClient): # here we define the FlowerClient. We have to implement only the following 3 methods
    def __init__(self, net, trainloader, valloader, eps, nm, mgn, history, cid): # of course we can customize these methods as we like
        self.net = net                            # we can add extra member variables as well
        self.trainloader = trainloader
        self.valloader = valloader
        self.eps = eps
        self.nm = nm
        self.mgn = mgn
        self.history = history
        self.parameters = None
        self.cid = cid

    def get_parameters(self, config):
        return get_parameters(self.net)

    def fit(self, parameters, config):
        set_parameters(self.net, parameters)
        self.parameters = get_parameters(self.net) # store previous parameters in case we have to discard the new ones because the epsilon value is not satisfied!
        epsilon, history = train(self.net, self.trainloader, epochs=1, mgn=self.mgn, nm=self.nm, history= self.history)

        accept = True # check if target epsilon value is respected
        if epsilon > self.eps + 0.3:
            accept = False # refuse client's new parameters
            print(f"Epsilon over target value ({self.eps}), disconnecting client.")
            set_parameters(self.net, self.parameters)
        metrics = {
            "epsilon": epsilon,
            "accept": accept,
            "history": history
        }
        return get_parameters(self.net), len(self.trainloader), metrics

    def evaluate(self, parameters, config):
        set_parameters(self.net, parameters)
        loss, accuracy = test(self.net, self.valloader)
        return float(loss), len(self.valloader), {"accuracy": float(accuracy)}

In [20]:
class BenignFlowerClient(FlowerClient):
    def __init__(self, net, trainloader, valloader, eps, nm, mgn, history, cid):
        super().__init__(net, trainloader, valloader, eps, nm, mgn, history, cid)

    def fit(self, parameters, config):
        #print("Benign Client Got Selected", flush=True)
        parameters, num_examples, metrics = super().fit(parameters, config)
        metrics["intention"] = "BENIGN" # just add that the client is benign
        return parameters, num_examples, metrics

class MaliciousFlowerClient(FlowerClient):
    def __init__(self, net, trainloader, valloader, eps, nm, mgn, history, cid):
        super().__init__(net, trainloader, valloader, eps, nm, mgn, history, cid)

    def fit(self, parameters, config):
        #print("Malicious Client Got Selected", flush=True)
        parameters, num_examples, metrics = super().fit(parameters, config)
        metrics["intention"] = "MALICIOUS" # just add that the client is malicious
        return parameters, num_examples, metrics

In [21]:
def client_fn(cid: str) -> FlowerClient: # this is a factory function (factory desing pattern)
    net = LeNet5(10).to(DEVICE) # flower calls this function to create FlowerClients on demand (this way it uses less memory)
    trainloader = trainloaders[int(cid)]
    valloader = valloaders[int(cid)]

    if int(cid) <= NUM_CLIENTS * 0.2:    # 20 - 80 --> malicious - benign
        return MaliciousFlowerClient(net, trainloader, valloader, eps=200.0, nm=0.3, mgn=1.0, history = histories[int(cid)], cid=int(cid))
    else:
        return BenignFlowerClient(net, trainloader, valloader, eps=200.0, nm=0.3, mgn=1.0, history = histories[int(cid)], cid=int(cid))

In [22]:
def weighted_average(metrics: List[Tuple[int, Metrics]]) -> Metrics: # we pass this function as an argument to the Strategy
    # calculate accuracy
    accuracies = [num_examples * m["accuracy"] for num_examples, m in metrics]
    examples = [num_examples for num_examples, _ in metrics]

    # return it as a dictionary
    return {"accuracy": sum(accuracies) / sum(examples)}

In [23]:
federatedModel = LeNet5(10)

In [24]:
class AttackSimulationStrategy(fl.server.strategy.FedAvg): # we inherit from FedAvg strategy and change only what we need
    def __init__(self, *, fraction_fit: float = 1.0, fraction_evaluate: float = 1.0, min_fit_clients: int = 2,
        min_evaluate_clients: int = 2, min_available_clients: int = 2, num_malicious_clients: int = 2,
        num_clients_to_keep: int = 0, evaluate_fn: Optional[Callable[[int, NDArrays, Dict[str, Scalar]],
        Optional[Tuple[float, Dict[str, Scalar]]],]] = None, on_fit_config_fn: Optional[Callable[[int], Dict[str, Scalar]]] = None,
        on_evaluate_config_fn: Optional[Callable[[int], Dict[str, Scalar]]] = None, accept_failures: bool = True,
        initial_parameters: Optional[Parameters] = None, fit_metrics_aggregation_fn: Optional[MetricsAggregationFn] = None,
        evaluate_metrics_aggregation_fn: Optional[MetricsAggregationFn] = None,
        perturbationVector: str, adversaryKnowledge: str
    ) -> None:
        super().__init__(
            fraction_fit=fraction_fit, fraction_evaluate=fraction_evaluate, min_fit_clients=min_fit_clients,
            min_evaluate_clients=min_evaluate_clients, min_available_clients=min_available_clients, evaluate_fn=evaluate_fn,
            on_fit_config_fn=on_fit_config_fn, on_evaluate_config_fn=on_evaluate_config_fn, accept_failures=accept_failures,
            initial_parameters=initial_parameters, fit_metrics_aggregation_fn=fit_metrics_aggregation_fn,
            evaluate_metrics_aggregation_fn=evaluate_metrics_aggregation_fn,
        )
        self.num_malicious_clients = num_malicious_clients
        self.num_clients_to_keep = num_clients_to_keep
        self.perturbationVector = perturbationVector
        self.adversaryKnowledge = adversaryKnowledge
        self.max_epsilon = 0.0 # this is the global privacy budget

    def __repr__(self) -> str:
        rep = f"AttackSimulationStrategy(accept_failures={self.accept_failures})"
        return rep

    def aggregate_fit(self, server_round: int, results: List[Tuple[ClientProxy, FitRes]],
        failures: List[Union[Tuple[ClientProxy, FitRes], BaseException]],) -> Tuple[Optional[Parameters], Dict[str, Scalar]]:

        if not results: # boilerplate code to handle exceptions
            return None, {}
        if not self.accept_failures and failures:
            return None, {}

        accepted_results = [] # get the privacy budget of each client
        disconnect_clients = []
        epsilons = []
        i = 0
        for c, r in results:
            histories[i] = r.metrics["history"]
            if r.metrics["accept"]:
                accepted_results.append([c, r])
                epsilons.append(r.metrics["epsilon"])
            else:
                disconnect_clients.append(c)
            i = i + 1

        for c in disconnect_clients:
            c.reconnect(ReconnectIns(seconds=None), timeout=None)

        results = accepted_results
        if epsilons:
            self.max_epsilon = max(self.max_epsilon, max(epsilons))
        print(f"Privacy budget ε at round {server_round}: {self.max_epsilon}")

        # convert results
        total_weights_results = [(parameters_to_ndarrays(fit_res.parameters), fit_res.num_examples) for _, fit_res in results]

        # calculate Non Attack AGR
        parameters_aggregated = ndarrays_to_parameters(aggregate(total_weights_results))

        # update federated learning model
        set_parameters(federatedModel, parameters_to_ndarrays(parameters_aggregated))

        # boilerplate code if aggregation metrics are provided
        metrics_aggregated = {}
        if self.fit_metrics_aggregation_fn:
            fit_metrics = [(res.num_examples, res.metrics) for _, res in results]
            metrics_aggregated = self.fit_metrics_aggregation_fn(fit_metrics)
        elif server_round == 1:  # Only log this warning once
            log(WARNING, "No fit_metrics_aggregation_fn provided")

        return parameters_aggregated, metrics_aggregated



In [25]:
# create a strategy
strategy = AttackSimulationStrategy(fraction_fit=1.0, fraction_evaluate=0.75, min_fit_clients=4,
    min_evaluate_clients=4, min_available_clients=4, perturbationVector="InverseStd", adversaryKnowledge="agr-only",
    evaluate_metrics_aggregation_fn=weighted_average,) # <-- pass the metric aggregation function. This function will be called
                                                       # in every federated learning round for evaluation (it aggregates the
                                                       # client-side evaluation metrics in the server)

# start simulation
fl.simulation.start_simulation(
    client_fn=client_fn, # out factory function
    num_clients=4, # number of clients
    config=fl.server.ServerConfig(num_rounds=11), # number of federated learning rounds
    strategy=strategy, # our attack simulation strategy
    client_resources=None,
)

INFO flwr 2023-11-12 17:43:33,376 | app.py:175 | Starting Flower simulation, config: ServerConfig(num_rounds=11, round_timeout=None)
INFO:flwr:Starting Flower simulation, config: ServerConfig(num_rounds=11, round_timeout=None)
2023-11-12 17:43:35,827	INFO worker.py:1621 -- Started a local Ray instance.
INFO flwr 2023-11-12 17:43:37,631 | app.py:210 | Flower VCE: Ray initialized with resources: {'CPU': 2.0, 'node:__internal_head__': 1.0, 'node:172.28.0.12': 1.0, 'memory': 7842678375.0, 'object_store_memory': 3921339187.0}
INFO:flwr:Flower VCE: Ray initialized with resources: {'CPU': 2.0, 'node:__internal_head__': 1.0, 'node:172.28.0.12': 1.0, 'memory': 7842678375.0, 'object_store_memory': 3921339187.0}
INFO flwr 2023-11-12 17:43:37,637 | app.py:218 | No `client_resources` specified. Using minimal resources for clients.
INFO:flwr:No `client_resources` specified. Using minimal resources for clients.
INFO flwr 2023-11-12 17:43:37,643 | app.py:224 | Flower VCE: Resources for each Virtual Cl

Privacy budget ε at round 1: 20.690627889756946


DEBUG flwr 2023-11-12 17:44:45,460 | server.py:187 | evaluate_round 1 received 4 results and 0 failures
DEBUG:flwr:evaluate_round 1 received 4 results and 0 failures
DEBUG flwr 2023-11-12 17:44:45,466 | server.py:222 | fit_round 2: strategy sampled 4 clients (out of 4)
DEBUG:flwr:fit_round 2: strategy sampled 4 clients (out of 4)
DEBUG flwr 2023-11-12 17:45:34,286 | server.py:236 | fit_round 2 received 4 results and 0 failures
DEBUG:flwr:fit_round 2 received 4 results and 0 failures
DEBUG flwr 2023-11-12 17:45:34,304 | server.py:173 | evaluate_round 2: strategy sampled 4 clients (out of 4)
DEBUG:flwr:evaluate_round 2: strategy sampled 4 clients (out of 4)


Privacy budget ε at round 2: 26.710303858014665


DEBUG flwr 2023-11-12 17:45:40,417 | server.py:187 | evaluate_round 2 received 4 results and 0 failures
DEBUG:flwr:evaluate_round 2 received 4 results and 0 failures
DEBUG flwr 2023-11-12 17:45:40,421 | server.py:222 | fit_round 3: strategy sampled 4 clients (out of 4)
DEBUG:flwr:fit_round 3: strategy sampled 4 clients (out of 4)
DEBUG flwr 2023-11-12 17:46:35,037 | server.py:236 | fit_round 3 received 4 results and 0 failures
DEBUG:flwr:fit_round 3 received 4 results and 0 failures
DEBUG flwr 2023-11-12 17:46:35,063 | server.py:173 | evaluate_round 3: strategy sampled 4 clients (out of 4)
DEBUG:flwr:evaluate_round 3: strategy sampled 4 clients (out of 4)


Privacy budget ε at round 3: 31.572781544878836


DEBUG flwr 2023-11-12 17:46:40,550 | server.py:187 | evaluate_round 3 received 4 results and 0 failures
DEBUG:flwr:evaluate_round 3 received 4 results and 0 failures
DEBUG flwr 2023-11-12 17:46:40,553 | server.py:222 | fit_round 4: strategy sampled 4 clients (out of 4)
DEBUG:flwr:fit_round 4: strategy sampled 4 clients (out of 4)
DEBUG flwr 2023-11-12 17:47:29,984 | server.py:236 | fit_round 4 received 4 results and 0 failures
DEBUG:flwr:fit_round 4 received 4 results and 0 failures
DEBUG flwr 2023-11-12 17:47:30,002 | server.py:173 | evaluate_round 4: strategy sampled 4 clients (out of 4)
DEBUG:flwr:evaluate_round 4: strategy sampled 4 clients (out of 4)


Privacy budget ε at round 4: 35.85593406877192


DEBUG flwr 2023-11-12 17:47:37,038 | server.py:187 | evaluate_round 4 received 4 results and 0 failures
DEBUG:flwr:evaluate_round 4 received 4 results and 0 failures
DEBUG flwr 2023-11-12 17:47:37,041 | server.py:222 | fit_round 5: strategy sampled 4 clients (out of 4)
DEBUG:flwr:fit_round 5: strategy sampled 4 clients (out of 4)
DEBUG flwr 2023-11-12 17:48:29,806 | server.py:236 | fit_round 5 received 4 results and 0 failures
DEBUG:flwr:fit_round 5 received 4 results and 0 failures
DEBUG flwr 2023-11-12 17:48:29,825 | server.py:173 | evaluate_round 5: strategy sampled 4 clients (out of 4)
DEBUG:flwr:evaluate_round 5: strategy sampled 4 clients (out of 4)


Privacy budget ε at round 5: 39.776106375073866


DEBUG flwr 2023-11-12 17:48:35,039 | server.py:187 | evaluate_round 5 received 4 results and 0 failures
DEBUG:flwr:evaluate_round 5 received 4 results and 0 failures
DEBUG flwr 2023-11-12 17:48:35,042 | server.py:222 | fit_round 6: strategy sampled 4 clients (out of 4)
DEBUG:flwr:fit_round 6: strategy sampled 4 clients (out of 4)
DEBUG flwr 2023-11-12 17:49:25,174 | server.py:236 | fit_round 6 received 4 results and 0 failures
DEBUG:flwr:fit_round 6 received 4 results and 0 failures
DEBUG flwr 2023-11-12 17:49:25,193 | server.py:173 | evaluate_round 6: strategy sampled 4 clients (out of 4)
DEBUG:flwr:evaluate_round 6: strategy sampled 4 clients (out of 4)


Privacy budget ε at round 6: 43.44133970609111


DEBUG flwr 2023-11-12 17:49:32,822 | server.py:187 | evaluate_round 6 received 4 results and 0 failures
DEBUG:flwr:evaluate_round 6 received 4 results and 0 failures
DEBUG flwr 2023-11-12 17:49:32,827 | server.py:222 | fit_round 7: strategy sampled 4 clients (out of 4)
DEBUG:flwr:fit_round 7: strategy sampled 4 clients (out of 4)
DEBUG flwr 2023-11-12 17:50:26,091 | server.py:236 | fit_round 7 received 4 results and 0 failures
DEBUG:flwr:fit_round 7 received 4 results and 0 failures
DEBUG flwr 2023-11-12 17:50:26,109 | server.py:173 | evaluate_round 7: strategy sampled 4 clients (out of 4)
DEBUG:flwr:evaluate_round 7: strategy sampled 4 clients (out of 4)


Privacy budget ε at round 7: 46.915331621582204


DEBUG flwr 2023-11-12 17:50:31,195 | server.py:187 | evaluate_round 7 received 4 results and 0 failures
DEBUG:flwr:evaluate_round 7 received 4 results and 0 failures
DEBUG flwr 2023-11-12 17:50:31,201 | server.py:222 | fit_round 8: strategy sampled 4 clients (out of 4)
DEBUG:flwr:fit_round 8: strategy sampled 4 clients (out of 4)
DEBUG flwr 2023-11-12 17:51:21,498 | server.py:236 | fit_round 8 received 4 results and 0 failures
DEBUG:flwr:fit_round 8 received 4 results and 0 failures
DEBUG flwr 2023-11-12 17:51:21,529 | server.py:173 | evaluate_round 8: strategy sampled 4 clients (out of 4)
DEBUG:flwr:evaluate_round 8: strategy sampled 4 clients (out of 4)


Privacy budget ε at round 8: 50.23897734733542


DEBUG flwr 2023-11-12 17:51:29,302 | server.py:187 | evaluate_round 8 received 4 results and 0 failures
DEBUG:flwr:evaluate_round 8 received 4 results and 0 failures
DEBUG flwr 2023-11-12 17:51:29,307 | server.py:222 | fit_round 9: strategy sampled 4 clients (out of 4)
DEBUG:flwr:fit_round 9: strategy sampled 4 clients (out of 4)
DEBUG flwr 2023-11-12 17:52:20,608 | server.py:236 | fit_round 9 received 4 results and 0 failures
DEBUG:flwr:fit_round 9 received 4 results and 0 failures
DEBUG flwr 2023-11-12 17:52:20,628 | server.py:173 | evaluate_round 9: strategy sampled 4 clients (out of 4)
DEBUG:flwr:evaluate_round 9: strategy sampled 4 clients (out of 4)


Privacy budget ε at round 9: 53.440010087080424


DEBUG flwr 2023-11-12 17:52:25,823 | server.py:187 | evaluate_round 9 received 4 results and 0 failures
DEBUG:flwr:evaluate_round 9 received 4 results and 0 failures
DEBUG flwr 2023-11-12 17:52:25,825 | server.py:222 | fit_round 10: strategy sampled 4 clients (out of 4)
DEBUG:flwr:fit_round 10: strategy sampled 4 clients (out of 4)
DEBUG flwr 2023-11-12 17:53:19,322 | server.py:236 | fit_round 10 received 4 results and 0 failures
DEBUG:flwr:fit_round 10 received 4 results and 0 failures
DEBUG flwr 2023-11-12 17:53:19,356 | server.py:173 | evaluate_round 10: strategy sampled 4 clients (out of 4)
DEBUG:flwr:evaluate_round 10: strategy sampled 4 clients (out of 4)


Privacy budget ε at round 10: 56.53929360975448


DEBUG flwr 2023-11-12 17:53:24,968 | server.py:187 | evaluate_round 10 received 4 results and 0 failures
DEBUG:flwr:evaluate_round 10 received 4 results and 0 failures
DEBUG flwr 2023-11-12 17:53:24,971 | server.py:222 | fit_round 11: strategy sampled 4 clients (out of 4)
DEBUG:flwr:fit_round 11: strategy sampled 4 clients (out of 4)
DEBUG flwr 2023-11-12 17:54:18,733 | server.py:236 | fit_round 11 received 4 results and 0 failures
DEBUG:flwr:fit_round 11 received 4 results and 0 failures
DEBUG flwr 2023-11-12 17:54:18,754 | server.py:173 | evaluate_round 11: strategy sampled 4 clients (out of 4)
DEBUG:flwr:evaluate_round 11: strategy sampled 4 clients (out of 4)


Privacy budget ε at round 11: 59.551722659010956


DEBUG flwr 2023-11-12 17:54:26,483 | server.py:187 | evaluate_round 11 received 4 results and 0 failures
DEBUG:flwr:evaluate_round 11 received 4 results and 0 failures
INFO flwr 2023-11-12 17:54:26,486 | server.py:153 | FL finished in 638.80449234
INFO:flwr:FL finished in 638.80449234
INFO flwr 2023-11-12 17:54:26,490 | app.py:225 | app_fit: losses_distributed [(1, 0.06547332084178924), (2, 0.06317389583587647), (3, 0.062029316961765286), (4, 0.06050258672237396), (5, 0.058819595575332645), (6, 0.05797963374853134), (7, 0.057408484101295475), (8, 0.056748649239540094), (9, 0.05643638944625855), (10, 0.056866947054862976), (11, 0.05624494415521622)]
INFO:flwr:app_fit: losses_distributed [(1, 0.06547332084178924), (2, 0.06317389583587647), (3, 0.062029316961765286), (4, 0.06050258672237396), (5, 0.058819595575332645), (6, 0.05797963374853134), (7, 0.057408484101295475), (8, 0.056748649239540094), (9, 0.05643638944625855), (10, 0.056866947054862976), (11, 0.05624494415521622)]
INFO flwr 2

History (loss, distributed):
	round 1: 0.06547332084178924
	round 2: 0.06317389583587647
	round 3: 0.062029316961765286
	round 4: 0.06050258672237396
	round 5: 0.058819595575332645
	round 6: 0.05797963374853134
	round 7: 0.057408484101295475
	round 8: 0.056748649239540094
	round 9: 0.05643638944625855
	round 10: 0.056866947054862976
	round 11: 0.05624494415521622
History (metrics, distributed, evaluate):
{'accuracy': [(1, 0.2855), (2, 0.3), (3, 0.3095), (4, 0.33799999999999997), (5, 0.35250000000000004), (6, 0.359), (7, 0.36650000000000005), (8, 0.37850000000000006), (9, 0.38049999999999995), (10, 0.38850000000000007), (11, 0.38850000000000007)]}

In [27]:
print(test(federatedModel, testloader))

(0.05311087976694107, 0.4123)


In [28]:
class AttackDataset(Dataset):
    def __init__(self):
        self.data = []

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx]

    def __insertitem___(self, item):
        self.data.append(item)

In [29]:
### Membership Inference Attack###

# create and train shadow models
shadowModelsTotal = 3
shift = 4

shadowModels = [LeNet5(10) for _ in range(shadowModelsTotal)]
for idx in range(shadowModelsTotal):
    for epochidx in range(11):
        # print(idx, epochidx)
        trainV2(shadowModels[idx], trainloaders[shift+idx], epochs=1)
        trainV2(shadowModels[idx], trainloaders[shift+idx+1], epochs=1)
        trainV2(shadowModels[idx], trainloaders[shift+idx+2], epochs=1)
        trainV2(shadowModels[idx], trainloaders[shift+idx+3], epochs=1)

0 0
0 1
0 2
0 3
0 4
0 5
0 6
0 7
0 8
0 9
0 10
1 0
1 1
1 2
1 3
1 4
1 5
1 6
1 7
1 8
1 9
1 10
2 0
2 1
2 2
2 3
2 4
2 5
2 6
2 7
2 8
2 9
2 10


In [30]:
for idx in range(shadowModelsTotal):
    print(test(shadowModels[idx], testloader))

(0.05718909372091293, 0.5983)
(0.06277830110788345, 0.5806)
(0.06247637983560562, 0.5828)


In [31]:
# construct attack model's training set
import random

attackTrainingSet = AttackDataset()
for idx in range(shadowModelsTotal):
  for images, labels in trainloaders[4+idx]:
    logits, probas = shadowModels[idx](images)
    for dataidx in range(labels.size()[0]):
      attackTrainingSet.__insertitem___((torch.cat((probas[dataidx], torch.as_tensor([labels[dataidx]]))).detach(), 1)) # 1 is in
  for images, labels in trainloaders[idx]:
    logits, probas = shadowModels[idx](images)
    for dataidx in range(labels.size()[0]):
      attackTrainingSet.__insertitem___((torch.cat((probas[dataidx], torch.as_tensor([labels[dataidx]]))).detach(), 0)) # 0 is out
random.shuffle(attackTrainingSet.data)

ds_train = Subset(attackTrainingSet, [i for i in range(attackTrainingSet.__len__())])
attackSet = DataLoader(ds_train, batch_size=BATCH_SIZE, shuffle=True)

In [38]:
# create and train the attack model
attackModel = AttackMLP(11)
trainV2(attackModel, attackSet, epochs=6)

In [44]:
# construct attack model's evaluation set
attackEvaluationSet = AttackDataset()
for idx in range(1):
  for images, labels in testloader:
    logits, probas = federatedModel(images)
    for dataidx in range(labels.size()[0]):
      attackEvaluationSet.__insertitem___((torch.cat((probas[dataidx], torch.as_tensor([labels[dataidx]]))), 0)) # 1 is in - 0 is out
for idx in range(1):
  for images, labels in trainloaders[idx]:
    logits, probas = federatedModel(images)
    for dataidx in range(labels.size()[0]):
      attackEvaluationSet.__insertitem___((torch.cat((probas[dataidx], torch.as_tensor([labels[dataidx]]))), 1)) # 1 is in - 0 is out

ds_eval = Subset(attackEvaluationSet, [i for i in range(attackEvaluationSet.__len__())])
attackEvalSet = DataLoader(ds_eval, batch_size=1)

In [45]:
# evaluation
criterion = torch.nn.CrossEntropyLoss()
loss = 0.0
correct = [0 for _ in range(10)]
total = [0 for _ in range(10)]
attackModel.eval()
with torch.no_grad():
    for images, labels in attackEvalSet:
        images, labels = images.to(DEVICE), labels.to(DEVICE)
        outputs, probas = attackModel(images)
        loss += criterion(outputs, labels).item()
        _, predicted = torch.max(probas.data, 1)
        total[int(images[0][10].item())] += labels.size(0)
        correct[int(images[0][10].item())] += (predicted == labels).sum().item()
loss /= len(attackEvalSet.dataset)
print(sum(correct) / sum(total))
for i in range(10):
  print("class:", i, "=", correct[i]/total[i])

0.663448275862069
class: 0 = 0.6187245590230664
class: 1 = 0.6936936936936937
class: 2 = 0.700770847932726
class: 3 = 0.7087172218284904
class: 4 = 0.6925207756232687
class: 5 = 0.6717346233586731
class: 6 = 0.6271186440677966
class: 7 = 0.6612133605998637
class: 8 = 0.6172248803827751
class: 9 = 0.6466528640441684
