In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy
from scipy.stats import zscore
import copy

import random

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import Adam, SGD
import torchvision
import torchvision.transforms as tt
import torchvision.models as models
from torchvision.datasets import MNIST, ImageFolder
from torchvision.utils import make_grid
from torch.utils.data import random_split, DataLoader, Subset, SubsetRandomSampler

from sklearn.metrics import accuracy_score
import time

from copy import deepcopy
import logging

import csv

In [None]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
transform = tt.Compose([tt.ToTensor(),
                    tt.Normalize((0.1307,), (0.3081,))])

train_ds = MNIST(root='.', train=True, download=True, transform=transform)
test_ds = MNIST(root='.', train=False, download=True, transform=transform)

In [None]:
batch_size=100

train_dl = DataLoader(train_ds, batch_size, shuffle=True, num_workers = 4, pin_memory=True)
test_dl = DataLoader(test_ds, batch_size, num_workers = 4, pin_memory=True)

In [None]:
device = 'cuda' if torch.cuda.is_available else 'cpu'
device

'cuda'

In [None]:
class MnistNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Sequential(nn.Conv2d(1, 10, 5),
                                  nn.MaxPool2d(2),
                                  nn.ReLU())
        self.conv2 = nn.Sequential(nn.Conv2d(10, 20, kernel_size=5),
                                  nn.Dropout2d(),
                                  nn.MaxPool2d(2),
                                  nn.ReLU())
        self.fc1 = nn.Sequential(nn.Flatten(),
                                nn.Linear(320, 50),
                                nn.Dropout(),
                                nn.ReLU())
        self.fc2 = nn.Linear(50, 10)

    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.fc1(x)
        x = self.fc2(x)
        return x

print(MnistNet())

MnistNet(
  (conv1): Sequential(
    (0): Conv2d(1, 10, kernel_size=(5, 5), stride=(1, 1))
    (1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (2): ReLU()
  )
  (conv2): Sequential(
    (0): Conv2d(10, 20, kernel_size=(5, 5), stride=(1, 1))
    (1): Dropout2d(p=0.5, inplace=False)
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): ReLU()
  )
  (fc1): Sequential(
    (0): Flatten(start_dim=1, end_dim=-1)
    (1): Linear(in_features=320, out_features=50, bias=True)
    (2): Dropout(p=0.5, inplace=False)
    (3): ReLU()
  )
  (fc2): Linear(in_features=50, out_features=10, bias=True)
)


In [None]:
def test(model, test_dl, criterion):
    with torch.no_grad():
        model.to(device)
        model.eval()
        batch_loss, batch_acc = [], []
        for images, labels in test_dl:
            if torch.cuda.is_available():
              images = images.cuda()
              # labels = torch.tensor(labels)
              labels = labels.cuda()

            logits = model(images)
            loss = criterion(logits, labels)
            batch_loss.append(loss.cpu())
            pred = torch.argmax(logits, dim=1)
            batch_acc.append(accuracy_score(labels.cpu(), pred.cpu()))
        model.cpu()
        return sum(batch_loss)/len(batch_loss), sum(batch_acc)/len(batch_acc)

def fit(epochs, model, optimizer, criterion, train_dl, test_dl):
    train_loss, train_acc, test_loss, test_acc = [], [], [], []
    attack=None
    train_attack=[]
    for epoch in range(1,epochs+1):
        val=random.random()
        if val>0.20:
          attack=True
        else:
          attack=False

        trainl, traina, _ = train(model, train_dl, optimizer,attack)
        testl , testa = test(model, test_dl, criterion)
        train_loss.append(trainl.detach().numpy())
        train_acc.append(traina)

        train_attack.append(attack)

        test_loss.append(testl.detach().numpy())
        test_acc.append(testa)
        print(f'Epoch {epoch} - train_loss : {trainl :.4f}, train_acc : {traina:.4f}, test_loss : {testl:.4f}, test_acc : {testa:0.4f}')

    history = {'train_loss' : train_loss,
               'train_acc' : train_acc,
               'test_loss' : test_loss,
               'test_acc' : test_acc,
               'train_attacked_history' : train_attack}
    return history

epochs = 3
model = MnistNet()
optimizer = Adam(model.parameters(), lr = 0.001)
criterion = nn.CrossEntropyLoss()

# baseline_history = fit(epochs, model, optimizer, criterion, train_dl, test_dl)

## Train with attack

In [None]:
std_dev = 1 ###################### Attack 2 Parameter

"""
Get historical gradients
"""
def train(model, train_dl, optimizer, ID, attack_type, attack=False, hist_grads=None):
    model.to(device)

    # get global weight
    global_w = deepcopy(model.state_dict())
    global_w = torch.cat([v.flatten() for v in global_w.values()])

    model.train()

    batch_loss, batch_acc = [], []
    for images, labels in train_dl:
        if attack and attack_type == 'A1': ###################### Attack 1
            labels = label_poisoning(labels)

        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()

        images = images.float()
        logits = model(images)
        loss = criterion(logits, labels)
        loss.backward()

        # Clipping the gradients
        max_grad_norm = 1.0
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_grad_norm)

        # Adding noise
        for param in model.parameters():
            noise = torch.normal(0, std_dev, param.grad.shape, device=device)
            param.grad += noise / len(train_dl)

        # get local weight and gradient
        local_w = deepcopy(model.state_dict())

        optimizer.step()
        batch_loss.append(loss.cpu())
        pred = torch.argmax(logits, dim=1)
        batch_acc.append(accuracy_score(labels.cpu(), pred.cpu()))

    hist_grads.append(local_w)

    # mean of historical gradients
    mean_weights = {}
    for key in hist_grads[0]:
        param_stack = torch.stack([w[key] for w in hist_grads])
        mean_weights[key] = torch.mean(param_stack, dim=0)
    local_w_mean = torch.cat([v.flatten() for v in mean_weights.values()])

    # current gradient
    local_w = torch.cat([v.flatten() for v in local_w.values()])

    model.cpu()

    # print("local_w_mean?", local_w_mean.values == local_w.values)

    return sum(batch_loss)/len(batch_loss), sum(batch_acc)/len(batch_acc), hist_grads, local_w_mean, local_w

# flip all with 7 into 1
def label_poisoning(label):
    source_label=7
    target_label=1
    label[label == source_label] = target_label
    return label

# Train Clients


In [None]:
'''
Method 0
'''
from sklearn.cluster import KMeans

def find_attacker_id(clf):
    count_1 = sum(clf.labels_ == 1)
    count_0 = sum(clf.labels_ == 0)
    mal_label = 0 if count_1 > count_0 else 1
    atk_id = np.where(clf.labels_ == mal_label)[0]
    atk_id = set(atk_id.reshape((-1)))
    return atk_id

def find_targeted_attack(dict_hist_grad):
    value_hist_grad = np.array([v.cpu().numpy() for v in dict_hist_grad.values()])
    id_hist_grad = np.array(list(dict_hist_grad.keys()))

    cluster = KMeans(n_clusters=2, random_state=0).fit(value_hist_grad)

    attacker = find_attacker_id(cluster)
    attacker_id = id_hist_grad[list(attacker)]

    logging.info(f"This round TARGETED ATTACK: {attacker_id}")

    return attacker_id

In [None]:
def train_clients(hist_grad_all, client_models, client_optimizers, server_model, criterion, client_dls, num, attackers):
    client_loss, client_acc = [], []
    attacked_clients = []
    client_curr_grads_dicts = [{} for _ in client_models]
    client_hist_grads_dicts_mean = [{} for _ in client_models]

    for i, (model, optimizer, train_dl) in enumerate(zip(client_models, client_optimizers, client_dls)):
        model.load_state_dict(server_model.state_dict())

        hist_grads = hist_grad_all[i]

        is_attacked = False
        attack_type = '0'
        if (i in attackers):
            is_attacked = True
            attack_type = 'A1'

        attacked_clients.append(is_attacked)

        closs, cacc, hist_grads, local_w_mean, curr_grads = train(model, train_dl, optimizer, i, attack_type, is_attacked, hist_grads=hist_grads)

        client_loss.append(closs)
        client_acc.append(cacc)
        hist_grad_all[i] = hist_grads
        client_hist_grads_dicts_mean[i] = local_w_mean
        client_curr_grads_dicts[i] = curr_grads

    client_hist_grads_formatted = {client_id: local_w_mean for client_id, local_w_mean in enumerate(client_hist_grads_dicts_mean)}
    client_curr_grads_formatted = {client_id: curr_grads for client_id, curr_grads in enumerate(client_curr_grads_dicts)}

    return sum(client_loss)/len(client_loss), sum(client_acc)/len(client_acc), \
            client_hist_grads_formatted, client_curr_grads_formatted, attacked_clients

def fedavg(client_models, server_model, historical_grads, current_grads):
    server_new_dict = {}

    n = len(client_models)
    server_new_dict = {}
    for model in client_models:
        client_dict =  model.state_dict()
        for name in client_dict:
            server_new_dict[name] = server_new_dict.get(name, 0) + client_dict[name]
    server_new_dict = {k : v/n for k, v in server_new_dict.items()}
    server_model.load_state_dict(server_new_dict)

    # Perform Cluster on historical_grads & current_grads for comparison:
    combined_results = 'No Cluster Performed'
    cluster_results_1 = find_targeted_attack(historical_grads)
    print("cluster results of historical", cluster_results_1)

    cluster_results_2 = find_targeted_attack(current_grads)
    print("cluster results of current", cluster_results_2)

    return cluster_results_1, cluster_results_2

In [None]:
"""
Non-IID Case
"""
def create_skewed_label_distribution(skewness_factor, n):
    labels = list(range(10))  # Labels from 0 to 9
    num_labels = len(labels)
    client_label_distribution = {}

    for client_id in range(n):
        # Choose two primary labels for each client
        primary_labels = np.random.choice(labels, 2, replace=False)
        secondary_labels = [label for label in labels if label not in primary_labels]

        # Assign skewness factor to primary labels
        label_distribution = {label: (skewness_factor / 2) if label in primary_labels else ((1 - skewness_factor) / (num_labels - 2)) for label in labels}
        client_label_distribution[client_id] = label_distribution

    return client_label_distribution

def create_skewed_data_loaders(dataset, n, label_distribution_map):
    # Gather indices for each label
    label_indices = {label: [] for label in range(10)}
    for idx, (_, label) in enumerate(dataset):
        label_indices[label].append(idx)

    # Create data subsets for each client based on label distribution
    client_datasets = []
    for client_id in range(n):
        client_indices = []
        for label, proportion in label_distribution_map[client_id].items():
            num_samples = int(proportion * len(dataset) // n)
            client_indices += np.random.choice(label_indices[label], num_samples, replace=False).tolist()

        client_datasets.append(Subset(dataset, client_indices))

    # Create data loaders
    client_dls = [DataLoader(ds, batch_size, shuffle=True, num_workers=4, pin_memory=True) for ds in client_datasets]

    client_models = [MnistNet() for _ in range(n)]
    client_optimizers = [Adam(model.parameters(), 0.001) for model in client_models]

    return client_dls, client_models, client_optimizers

def non_iid_clients(train_ds, n):
    label_distribution_map = create_skewed_label_distribution(skewness_factor, n)
    client_dls, client_models, client_optimizers = create_skewed_data_loaders(train_ds, n, label_distribution_map)

    return client_dls, client_models, client_optimizers

In [None]:
all_cluster_results = []

def get_attackers_indices(total_clients, attack_rate):
    num_attackers = int(total_clients * attack_rate)
    return random.sample(range(total_clients), num_attackers)

def fit_fedavg(epochs, client_models, client_optimizers, server_model, criterion, client_dls, test_dl, num, attackers):
    train_loss, train_acc, test_loss, test_acc, anomalies = [], [], [], [], []
    attack=None
    train_attack=[]

    hist_grad_all = [[] for _ in client_models]

    for epoch in range(1,epochs+1):
        print(f"----epoch {epoch}----")
        trainl, traina, historical_grads, current_grad, attacked_clients = train_clients(hist_grad_all, client_models, client_optimizers, server_model, criterion, client_dls, num, attackers) # ATTACK
        cluster_results_1, cluster_results_2 = fedavg(client_models, server_model, historical_grads, current_grad)

        testl , testa = test(server_model, test_dl, criterion)

        formatted_trainl = f"{trainl:.4f}"
        formatted_traina = f"{traina:.4f}"
        formatted_testl = f"{testl:.4f}"
        formatted_testa = f"{testa:.4f}"

        all_cluster_results.append({
            'num': num,
            'rate': rate,
            'round': epoch,
            'ground truth': attackers,
            'historical_cluster': cluster_results_1,
            'current_cluster': cluster_results_2,
            'train_loss': formatted_trainl,
            'train_acc': formatted_traina,
            'test_loss': formatted_testl,
            'test_acc': formatted_testa
        })
        print("hey", formatted_trainl)
        print(f'Epoch {epoch} - train_loss : {trainl :.4f}, train_acc : {traina:.4f}, test_loss : {testl:.4f}, test_acc : {testa:0.4f}')

    return all_cluster_results

In [None]:
epochs = 50

# for skewness_factor in [0.3, 0.5, 0.7]: # degree of non-IID
#     print(f"---------------------skewness_factor {skewness_factor}------------------------")

skewness_factor = 0.7
# for n in [10, 20, 50]: # client num
#     print(f"---------------------client num {n}------------------------")

n = 10
client_dls_list, client_models, client_optimizers = non_iid_clients(train_ds, n)


for rate in [0.125, 0.20, 0.275, 0.35, 0.425, 0.475]: # malicious rate
    print(f"---------------------rate {rate}------------------------")

    server_model = MnistNet()
    # generate attackers
    attackers = get_attackers_indices(n, rate)
    print("attackers are ", attackers)

    # train and detection
    all_cluster_results = fit_fedavg(epochs, client_models, client_optimizers, server_model, criterion, client_dls_list, test_dl, n, attackers)

    with open('nonIID_w_dpsgd.csv', mode='w', newline='') as file:
        writer = csv.DictWriter(file, fieldnames=['num', 'rate', 'round', 'ground truth', 'historical_cluster', 'current_cluster','train_loss','train_acc','test_loss','test_acc'])
        writer.writeheader()
        for data in all_cluster_results:
            writer.writerow(data)

---------------------client num 10------------------------
---------------------rate 0.125------------------------
attackers are  [8]
----epoch 1----
cluster results of historical [9 5 6]
cluster results of current [9 5 6]
hey 1.6631
Epoch 1 - train_loss : 1.6631, train_acc : 0.5123, test_loss : 2.0005, test_acc : 0.4172
----epoch 2----
cluster results of historical [9 5 6]
cluster results of current [9 5 6]
hey 1.3081
Epoch 2 - train_loss : 1.3081, train_acc : 0.6187, test_loss : 1.2766, test_acc : 0.6751
----epoch 3----
cluster results of historical [9 5 6]
cluster results of current [9 2 5 6]
hey 0.9441
Epoch 3 - train_loss : 0.9441, train_acc : 0.7220, test_loss : 0.8107, test_acc : 0.7888
----epoch 4----
cluster results of historical [9 5 6]
cluster results of current [8]
hey 0.7559
Epoch 4 - train_loss : 0.7559, train_acc : 0.7709, test_loss : 0.6177, test_acc : 0.8357
----epoch 5----
cluster results of historical [9 5 6]
cluster results of current [8]
hey 0.6506
Epoch 5 - train_