In [1]:
pip install torchvision

Collecting torchvision
  Downloading torchvision-0.22.0-cp312-cp312-manylinux_2_28_x86_64.whl.metadata (6.1 kB)
Collecting torch==2.7.0 (from torchvision)
  Downloading torch-2.7.0-cp312-cp312-manylinux_2_28_x86_64.whl.metadata (29 kB)
Collecting filelock (from torch==2.7.0->torchvision)
  Downloading filelock-3.18.0-py3-none-any.whl.metadata (2.9 kB)
Collecting setuptools (from torch==2.7.0->torchvision)
  Downloading setuptools-80.3.1-py3-none-any.whl.metadata (6.5 kB)
Collecting sympy>=1.13.3 (from torch==2.7.0->torchvision)
  Downloading sympy-1.14.0-py3-none-any.whl.metadata (12 kB)
Collecting networkx (from torch==2.7.0->torchvision)
  Downloading networkx-3.4.2-py3-none-any.whl.metadata (6.3 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.6.77 (from torch==2.7.0->torchvision)
  Downloading nvidia_cuda_nvrtc_cu12-12.6.77-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.6.77 (from torch==2.7.0->torchvision)
  Downloading nvidia_cuda_runtime_cu1

In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import numpy as np
import random

# Parameters
NUM_CLIENTS = 10
BATCH_SIZE = 64
EPOCHS = 3  # Rounds de FL
LOCAL_EPOCHS = 2  # Nombre d'epochs d'entraînement local par client
LEARNING_RATE = 0.01
EPSILON = 10
R = 0.075  # Range pour la perturbation

# Fix seed for reproducibility
torch.manual_seed(0)
np.random.seed(0)
random.seed(0)

# 1. Load MNIST and split among clients
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))  # Normalisation des données MNIST
])
mnist_train = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
client_datasets = torch.utils.data.random_split(mnist_train, [len(mnist_train)//NUM_CLIENTS]*NUM_CLIENTS)

# 2. Define a better CNN model
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(2)
        self.dropout = nn.Dropout(0.25)
        self.fc1 = nn.Linear(64 * 7 * 7, 128)
        self.fc2 = nn.Linear(128, 10)
        
    def forward(self, x):
        x = self.pool(torch.relu(self.conv1(x)))
        x = self.pool(torch.relu(self.conv2(x)))
        x = self.dropout(x)
        x = x.view(-1, 64 * 7 * 7)
        x = torch.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        return x

# 3. Local Differential Privacy Perturbation
def ldp_perturb(w, c, r, epsilon):
    p = ((w - c) * (np.exp(epsilon) - 1) + r * (np.exp(epsilon) + 1)) / (2 * r * (np.exp(epsilon) + 1))
    if np.random.rand() < p:
        return c + r * (np.exp(epsilon) + 1) / (np.exp(epsilon) - 1)
    else:
        return c - r * (np.exp(epsilon) + 1) / (np.exp(epsilon) - 1)

def perturb_model(model, c=0.0, r=R, epsilon=EPSILON):
    with torch.no_grad():
        for param in model.parameters():
            w_np = param.view(-1).cpu().numpy()
            perturbed = np.array([ldp_perturb(wi, c, r, epsilon) for wi in w_np])
            param.copy_(torch.tensor(perturbed).view_as(param))
    return model

# 4. Federated learning loop with local training and aggregation
def federated_learning(apply_ldp=False):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    global_model = CNN().to(device)
    criterion = nn.CrossEntropyLoss()

    for round in range(EPOCHS):
        print(f"\nRound {round + 1}")
        client_models = []

        for client_id in range(NUM_CLIENTS):
            print(f"  Training Client {client_id + 1}/{NUM_CLIENTS}", end="\r")
            
            # Initialiser un nouveau modèle client avec les poids globaux
            client_model = CNN().to(device)
            client_model.load_state_dict(global_model.state_dict())
            
            optimizer = optim.SGD(client_model.parameters(), lr=LEARNING_RATE, momentum=0.9)
            train_loader = DataLoader(client_datasets[client_id], batch_size=BATCH_SIZE, shuffle=True)

            # Local training - plusieurs epochs locaux
            client_model.train()
            for epoch in range(LOCAL_EPOCHS):
                running_loss = 0.0
                for data, target in train_loader:
                    data, target = data.to(device), target.to(device)
                    optimizer.zero_grad()
                    output = client_model(data)
                    loss = criterion(output, target)
                    loss.backward()
                    optimizer.step()
                    running_loss += loss.item()

            # Apply LDP perturbation if enabled
            if apply_ldp:
                perturb_model(client_model, r=R, epsilon=EPSILON)
                
            client_models.append(client_model.state_dict())

        # Aggregation (average weights)
        new_state_dict = global_model.state_dict()
        for key in new_state_dict:
            new_state_dict[key] = torch.stack([client_model[key] for client_model in client_models], 0).mean(0)
        global_model.load_state_dict(new_state_dict)
        
        # Évaluation à chaque round
        evaluate_model(global_model, device, f"Round {round + 1}")

    return global_model

# 5. Evaluation
def evaluate_model(model, device, stage="Final"):
    model.eval()
    test_loader = DataLoader(
        datasets.MNIST(root='./data', train=False, transform=transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.1307,), (0.3081,))
        ])),
        batch_size=1000
    )
    
    correct = 0
    total = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            pred = output.argmax(dim=1)
            correct += (pred == target).sum().item()
            total += target.size(0)
    
    accuracy = 100 * correct / total
    print(f"{stage} Test Accuracy: {accuracy:.2f}%")
    return accuracy

# Run everything
print("Running Federated Learning without LDP perturbation")
model = federated_learning(apply_ldp=False)
print("\nFinal evaluation:")
evaluate_model(model, torch.device("cuda" if torch.cuda.is_available() else "cpu"))


100%|██████████| 9.91M/9.91M [00:00<00:00, 13.9MB/s]
100%|██████████| 28.9k/28.9k [00:00<00:00, 377kB/s]
100%|██████████| 1.65M/1.65M [00:00<00:00, 3.47MB/s]
100%|██████████| 4.54k/4.54k [00:00<00:00, 4.50MB/s]


Running Federated Learning without LDP perturbation

Round 1
Round 1 Test Accuracy: 95.45%

Round 2
Round 2 Test Accuracy: 97.44%

Round 3
Round 3 Test Accuracy: 98.14%

Final evaluation:
Final Test Accuracy: 98.14%


98.14

In [3]:
# Vous pouvez également tester avec perturbation LDP:
print("\nRunning Federated Learning WITH LDP perturbation")
model_with_ldp = federated_learning(apply_ldp=True)
print("\nFinal evaluation with LDP:")
evaluate_model(model_with_ldp, torch.device("cuda" if torch.cuda.is_available() else "cpu"))


Running Federated Learning WITH LDP perturbation

Round 1
Round 1 Test Accuracy: 91.93%

Round 2
Round 2 Test Accuracy: 96.49%

Round 3
Round 3 Test Accuracy: 97.15%

Final evaluation with LDP:
Final Test Accuracy: 97.15%


97.15