<a href="https://colab.research.google.com/github/vvmnnnkv/private-ai/blob/master/Section%203%20-%20Encrypted%20Federated%20Learning%20Project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Federated Learning with Encrypted Gradients Aggregation Project

This project is improvement of naive Federated Learning protocol implemented previuosly. Now the gradients are securely aggregated using additive secret sharing method.

Aggregation is done among workers so we don't need separate dedicated aggregator worker.

In [0]:
# install dependency
!pip install syft

In [2]:
import torch
import syft as sy
import math
import pandas as pd

hook = sy.TorchHook(torch)

W0708 21:44:12.342126 140709261846400 secure_random.py:26] Falling back to insecure randomness since the required custom op could not be found for the installed version of TensorFlow. Fix this by compiling custom ops. Missing file was '/usr/local/lib/python3.6/dist-packages/tf_encrypted/operations/secure_random/secure_random_module_tf_1.14.0.so'
W0708 21:44:12.363773 140709261846400 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/tf_encrypted/session.py:26: The name tf.Session is deprecated. Please use tf.compat.v1.Session instead.



In [3]:
# load MNIST included with Colab
def mnist_to_torch(df, train=True):
  y = pd.get_dummies(df[0])
  X = df.drop(0, axis=1)
  X, y = torch.tensor(X.values).type(torch.float), torch.tensor(y.values).type(torch.float)
  return X, y

# Train & test datasets
X_train, y_train = mnist_to_torch(pd.read_csv("sample_data/mnist_train_small.csv", header=None))
X_test, y_test = mnist_to_torch(pd.read_csv("sample_data/mnist_test.csv", header=None))

num_train = X_train.size(0)
num_features = X_train.size(1)

print("Train size %d, test size: %d" % (num_train, y_test.size(0)))

Train size 20000, test size: 10000


In [0]:
# number of workers
# NOTE: there's a bug in pysyft that prevents sharing to more than 2 workers: 
# https://github.com/OpenMined/PySyft/issues/2341
num_workers = 2
# Create workers
workers = []
for i in range(num_workers):
  worker = sy.VirtualWorker(hook, id="worker_%d" % i)
  workers.append(worker)
  
crypto_provider = sy.VirtualWorker(hook, id="crypto-provider")

In [0]:
# Split data and send chunk to each worker
fed_dataset = []
chunk_size = num_train // num_workers
for i in range(num_workers):
  start = i * chunk_size
  if i + 1 < num_workers:
    end = (i + 1) * chunk_size
  else:
    end = num_train
  fed_dataset.append((
      X_train[start:end].send(workers[i]), 
      y_train[start:end].send(workers[i])
  ))


In [6]:
print(workers)
print(fed_dataset)

[<VirtualWorker id:worker_0 #objects:2>, <VirtualWorker id:worker_1 #objects:2>]
[((Wrapper)>[PointerTensor | me:41187151049 -> worker_0:66122510366], (Wrapper)>[PointerTensor | me:81037035040 -> worker_0:93398699170]), ((Wrapper)>[PointerTensor | me:61797687527 -> worker_1:33244494485], (Wrapper)>[PointerTensor | me:24216443568 -> worker_1:72551751488])]


In [0]:
# Take model gradients and share among workers
def share_model_grads(model, workers, crypto_provider):
  out = {}
  for name, param in model.named_parameters():
    if not param.requires_grad: continue
    out[name] = param.grad.fix_prec().share(*workers, crypto_provider=crypto_provider)
  return out

  
# Federated training procedure
def fed_train(model, criteria, fed_dataset, test_dataset, opt, avg_epochs = 50, worker_epochs = 1, lr=0.001):
  for global_epoch in range(avg_epochs):
    # copy latest model to workers
    fed_models = {}
    for X, y in fed_dataset:
      fed_model = model.copy().send(X.location)
      optimizer = opt(params=fed_model.parameters(), lr=lr)
      fed_models[fed_model.location.id] = (fed_model, optimizer)
    
    # train in parallel on workers
    for local_epoch in range(worker_epochs):
      losses = []
      for X, y in fed_dataset:
        fed_model, optimizer = fed_models[X.location.id]
        pred = fed_model(X)
        loss = criteria(pred, y)
        loss.backward()
        optimizer.step()
        loss = loss.get()
        losses.append(loss)
      print('Avg loss (%d/%d): %f' % (global_epoch, local_epoch, sum(losses) / len(losses)))
      
    # aggregate worker's models
    # share each model to each worker
    all_grads = {}
    for w, fm in fed_models.items():
      # print('sharing %s model' % w)
      all_grads[w] = share_model_grads(fm[0], workers, crypto_provider)
      # move shared pointers to my machine
      all_grads[w] = { k:v.get() for k,v in all_grads[w].items() }
      
    # prepare avg model placeholder
    with torch.no_grad():
      # calc grads sum
      grads_sum = None
      for w, grads in all_grads.items():
        if not grads_sum:
          grads_sum = grads
          continue
        for n, data in grads_sum.items():
          grads_sum[n] += grads[n]
      
      state = model.state_dict()
      # cal avg, retrieve and apply to local model
      for n, data in grads_sum.items():
        grads_sum[n] /= len(workers)
        grads_sum[n] = grads_sum[n].get().float_prec()
        state[n] -= lr * grads_sum[n]
      model.load_state_dict(state)

      # calculate accuracy on test set
      X_test, y_test = test_dataset
      y_pred = torch.softmax(model(X_test), dim=1)
      valid = (torch.argmax(y_pred, dim=1) == torch.argmax(y_test, dim=1)).sum()
      print('Test Accuracy: %f' % (float(valid) / float(y_test.size(0))))

  return model

    

In [8]:
# Define a simple MLP model (softmax is included in loss)
model = torch.nn.Sequential(
  torch.nn.Linear(num_features, 50),
  torch.nn.ReLU(),
  torch.nn.Linear(50, 10)
)
loss = torch.nn.modules.loss.BCEWithLogitsLoss()

# Train!
fed_train(model, loss, fed_dataset, (X_test, y_test), torch.optim.SGD, 200, 1)



Avg loss (0/0): 9.773861
Test Accuracy: 0.146000
Avg loss (1/0): 3.588379
Test Accuracy: 0.161300
Avg loss (2/0): 2.208213
Test Accuracy: 0.176100
Avg loss (3/0): 1.864568
Test Accuracy: 0.188900
Avg loss (4/0): 1.680759
Test Accuracy: 0.199700
Avg loss (5/0): 1.553097
Test Accuracy: 0.214000
Avg loss (6/0): 1.452741
Test Accuracy: 0.225700
Avg loss (7/0): 1.368572
Test Accuracy: 0.239200
Avg loss (8/0): 1.295168
Test Accuracy: 0.248600
Avg loss (9/0): 1.229687
Test Accuracy: 0.260400
Avg loss (10/0): 1.170548
Test Accuracy: 0.270900
Avg loss (11/0): 1.116734
Test Accuracy: 0.280800
Avg loss (12/0): 1.067501
Test Accuracy: 0.292400
Avg loss (13/0): 1.022332
Test Accuracy: 0.302000
Avg loss (14/0): 0.980860
Test Accuracy: 0.312500
Avg loss (15/0): 0.942804
Test Accuracy: 0.322600
Avg loss (16/0): 0.907807
Test Accuracy: 0.331800
Avg loss (17/0): 0.875643
Test Accuracy: 0.340800
Avg loss (18/0): 0.846064
Test Accuracy: 0.350200
Avg loss (19/0): 0.818938
Test Accuracy: 0.355700
Avg loss (

Sequential(
  (0): Linear(in_features=784, out_features=50, bias=True)
  (1): ReLU()
  (2): Linear(in_features=50, out_features=10, bias=True)
)