<a href="https://colab.research.google.com/github/yannuma/PrivacyProject/blob/main/Model_Creation/FL_without_Librarys.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
!pip install opacus
from opacus import PrivacyEngine
from opacus.validators import ModuleValidator
import tarfile
import torch
import requests
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader, random_split
import torch.nn.functional as F
import torchvision
import torch.utils.data as torch_data
import torchvision.transforms as transforms
import numpy as np
import itertools
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import time
from google.colab import files

Collecting opacus
  Downloading opacus-1.4.1-py3-none-any.whl (226 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/226.7 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m226.7/226.7 kB[0m [31m7.4 MB/s[0m eta [36m0:00:00[0m
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch>=2.0->opacus)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch>=2.0->opacus)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch>=2.0->opacus)
  Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)
Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch>=2.0->opacus)
  Using cached nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl (731.7 MB)
Collecting nvidia-cublas-cu12==12.1.3.1 (from torch>=2.0->opacus)


In [3]:
response = requests.get(f'https://www.comp.nus.edu.sg/~reza/files/dataset_texas.tgz')
if response.status_code == 200:
    with open(f'dataset_texas.tgz', 'wb') as file:
        file.write(response.content)
    print("Download completed successfully.")
else:
    print(f"Failed to download file: {response.status_code}")

with tarfile.open(f'dataset_texas.tgz') as f:
    f.extractall(f'data/')

with open('data/texas/100/feats', 'r') as f:
    features = f.readlines()
with open('data/texas/100/labels', 'r') as f:
    labels = f.readlines()

print(len(features))
print(len(labels))

features_list = [list(map(int, ''.join(feature.split()).split(','))) for feature in features]
labels_list = [int(label.strip()) for label in labels]

Download completed successfully.
67330
67330


In [4]:
class NetSeq(nn.Module):
    def __init__(self):
        super(NetSeq, self).__init__()
        self.fc1 = nn.Linear(6169, 128)
        self.fc2 = nn.Linear(128, 128)
        self.fc3 = nn.Linear(128, 101)

    def forward(self, x):
        x = torch.tanh(self.fc1(x))
        x = torch.tanh(self.fc2(x))
        x = self.fc3(x)
        return x

def calc_accuracy(loader, network, device):
    correct = 0
    total = 0
    for data in loader:
      inputs, labels = data
      inputs, labels = inputs.to(device), labels.to(device)
      outputs = network(inputs)
      _, predicted = torch.max(outputs.data, 1)
      total += labels.size(0)
      correct += (predicted == labels).sum().item()
    return (correct / total, total)

In [5]:
def train_client(trainloader, testloader, lr = 0.01, epochs = 2, network_output = True):
    test_accs = []
    max_accuracy = 0
    best_model_path = 'best_model.pth'

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    network = NetSeq().to(device)
    optimizer = torch.optim.Adam(network.parameters(), lr = lr)
    criterion = nn.CrossEntropyLoss()

    #print('Start training on', device)

    for epoch in range(epochs):

      #print('Epoch: {}'.format(epoch + 1))
      for i, data in enumerate(trainloader, 0):
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = network(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

      # Validate all classes
      with torch.no_grad():
        acc_tuple = calc_accuracy(testloader, network, device)

      #print('Accuracy of the network on %d test inputs: %d %%' % (acc_tuple[1], 100 * acc_tuple[0]))
      test_accs.append(acc_tuple[0])
      if acc_tuple[0] > max_accuracy:
        max_accuracy = acc_tuple[0]
        if network_output:
          torch.save(network.state_dict(), best_model_path)

    if network_output:
      network.load_state_dict(torch.load(best_model_path))
      network.to('cpu')
      #print('Final accuracy: ', max_accuracy)
      return (network, max_accuracy)
    else:
      #print('Final accuracy: ', max_accuracy)
      return max_accuracy


def train_client_DP(trainloader, testloader, lr = 0.001, epochs = 80, network_output = True, C = 3, epsilon = 10):
		test_accs = []
		max_accuracy = 0
		best_model_path = 'best_model.pth'
		Delta = 1e-5

		device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

		network = NetSeq().to(device)
		network = ModuleValidator.fix(network)
		network = network.to(device)
		optimizer = torch.optim.Adam(network.parameters(), lr = lr)
		criterion = nn.CrossEntropyLoss()

		privacy_engine = PrivacyEngine()
		network, optimizer, trainloader = privacy_engine.make_private_with_epsilon(
    	module=network,
    	optimizer=optimizer,
    	data_loader=trainloader,
    	max_grad_norm=C,
			target_epsilon = epsilon,
			target_delta = Delta,
			epochs = epochs
			)

		network.to(device)

		#print('Start training on', device)
		for epoch in range(epochs):

			#print('Epoch: {}'.format(epoch + 1))
			for i, data in enumerate(trainloader, 0):
				inputs, labels = data
				inputs, labels = inputs.to(device), labels.to(device)
				optimizer.zero_grad()
				outputs = network(inputs)
				loss = criterion(outputs, labels)
				loss.backward()
				optimizer.step()

			# Validate all classes
			with torch.no_grad():
				acc_tuple = calc_accuracy(testloader, network, device)

			#print('Accuracy of the network on %d test inputs: %d %%' % (acc_tuple[1], 100 * acc_tuple[0]))
			test_accs.append(acc_tuple[0])
			if acc_tuple[0] > max_accuracy:
				max_accuracy = acc_tuple[0]
				if network_output:
					torch.save(network.state_dict(), best_model_path)

		epsilon = privacy_engine.get_epsilon(delta=Delta)
		#print("Our Privacy Budget is:", epsilon)

		if network_output:
			network.load_state_dict(torch.load(best_model_path))
			network.to('cpu')
			#print('Final accuracy: ', max_accuracy)
			return (network, max_accuracy)
		else:
			#print('Final accuracy: ', max_accuracy)
			return max_accuracy

def aggregate_models(models):
  for model in models:
    model.eval()

  num_models = len(models)
  aggregated_params = {}

  for model in models:
      for name, param in model.named_parameters():
          if name in aggregated_params:
              aggregated_params[name] += param.data
          else:
              aggregated_params[name] = param.data.clone()

  # Calculate the average of aggregated parameters
  for name in aggregated_params:
      aggregated_params[name] /= num_models

  # Create a new model with the averaged parameters
  aggregated_model = type(models[0])()
  aggregated_model.load_state_dict(aggregated_params)

  return aggregated_model

In [6]:
clients = 2

dataset = TensorDataset(torch.tensor(features_list, dtype=torch.float), torch.tensor(labels_list, dtype=torch.long))
client_size = len(dataset) // clients
sizes = [client_size] * (clients - 1) + [len(dataset) - client_size * (clients - 1)]
client_datasets = random_split(dataset, sizes)
train_loaders = []
test_loaders = []
batch = 64

for client_dataset in client_datasets:
    train_size = int(0.8 * len(client_dataset))
    train_subset, test_subset = torch.utils.data.random_split(client_dataset, [train_size, len(client_dataset) - train_size])
    train_loader = DataLoader(train_subset, batch_size=batch, shuffle=True)
    train_loaders.append(train_loader)
    test_loader = DataLoader(test_subset, batch_size=batch, shuffle=False)
    test_loaders.append(test_loader)

In [15]:
epochs = [5, 10, 20, 40, 60]

results = []
agg_model = []
for epoch in epochs:
  client_models = []

  #Train the clients
  for client in range(clients):
    res = train_client(train_loaders[client], test_loaders[client], 0.001, epoch)
    client_models.append(res[0])
    results.append({'Mode': "Training Client", 'client': client, 'epoch': epoch, 'accuracy': res[1]})

  #aggegrate the model
  aggegrated_model = aggregate_models(client_models)
  agg_model.append(aggegrated_model)
  #calculate the accuracy for each client testset
  for client in range(clients):
      with torch.no_grad():
        acc = calc_accuracy(test_loaders[client], aggegrated_model, device = 'cpu')
        results.append({'Mode': "Aggregated Model", 'client': client, 'epoch': epoch, 'accuracy': acc[0]})

df = pd.DataFrame(results)
agg_model_df = df[df['Mode'] == "Aggregated Model"]
average_accuracy_per_epoch = agg_model_df.groupby('epoch')['accuracy'].mean()
max_epoch = average_accuracy_per_epoch.idxmax()
max_average_accuracy = average_accuracy_per_epoch.max()

print("Average Accuracy per Epoch for Aggregated Model:")
print(average_accuracy_per_epoch)
print("\nMaximum Average Accuracy for Aggregated Model:")
print(f"Epoch: {max_epoch}, Accuracy: {max_average_accuracy}")

Average Accuracy per Epoch for Aggregated Model:
epoch
5     0.585177
10    0.574113
20    0.571513
40    0.602555
60    0.595723
Name: accuracy, dtype: float64

Maximum Average Accuracy for Aggregated Model:
Epoch: 40, Accuracy: 0.6025545819099956


In [16]:
epoch_index = epochs.index(max_epoch)
best_model = agg_model[epoch_index]
torch.save(best_model.state_dict(), 'model_FL.pth')
from google.colab import files
files.download('model_FL.pth')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [24]:
#Todo everything from here

epsilons = [1, 10, 20]
epochs = [5, 10, 20, 40, 60]
results_DP = []
for eps in epsilons:
  for epoch in epochs:
    client_models = []
    for client in range(clients):
      res = (train_client_DP(train_loaders[client], test_loaders[client], epochs = epoch, epsilon = eps))
      client_models.append(res[0]._module)
      results_DP.append({'Mode': "Training Client", 'client': client, 'epsilon': eps,'epoch': epoch, 'accuracy': res[1]})

    aggegrated_model = aggregate_models(client_models)

    for client in range(clients):
        with torch.no_grad():
          #device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
          device = 'cpu'
          acc = calc_accuracy(test_loaders[client], aggegrated_model, device)
          results_DP.append({'Mode': "Aggregated Model", 'client': client, 'epsilon': eps,'epoch': epoch, 'accuracy': acc[0]})




In [23]:
df_DP = pd.DataFrame(results_DP)
agg_model_df = df_DP[df_DP['Mode'] == "Aggregated Model"]
average_accuracy_per_epoch_epsilon = agg_model_df.groupby(['epoch', 'epsilon'])['accuracy'].mean()
print(average_accuracy_per_epoch_epsilon)

max_accuracy_per_epsilon = average_accuracy_per_epoch_epsilon.groupby('epsilon').idxmax().apply(
    lambda x: (x[0], average_accuracy_per_epoch_epsilon.loc[x])
)
max_accuracy_per_epsilon = max_accuracy_per_epsilon.rename("epoch, accuracy").reset_index()
print("Maximum Average Accuracy per Epsilon:")
print(max_accuracy_per_epsilon)

epoch  epsilon
5      1          0.157285
       10         0.246101
       20         0.286722
10     1          0.243873
       10         0.315535
       20         0.337591
20     1          0.293406
       10         0.401753
       20         0.425590
40     1          0.311451
       10         0.414228
       20         0.435764
60     1          0.295411
       10         0.388460
       20         0.424254
Name: accuracy, dtype: float64
Maximum Average Accuracy per Epsilon:
   epsilon            epoch, accuracy
0        1   (40, 0.3114510619337591)
1       10   (40, 0.4142284271498589)
2       20  (40, 0.43576414673993763)


In [8]:
epsilons = [1, 10, 20]
epoch = 40
for eps in epsilons:
  client_models = []
  for client in range(clients):
    res = (train_client_DP(train_loaders[client], test_loaders[client], epochs = epoch, epsilon = eps))
    client_models.append(res[0]._module)

  aggegrated_model = aggregate_models(client_models)
  torch.save(aggegrated_model.state_dict(), f'model_FL_epsilon_{eps}.pth')
  files.download(f'model_FL_epsilon_{eps}.pth')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>