In [1]:
pip install tensorflow tensorflow-federated numpy



In [9]:
import numpy as np
import dp_accounting
import pandas as pd
import tensorflow as tf
import tensorflow_federated as tff
import tensorflow_privacy
from tensorflow_privacy import DPKerasSGDOptimizer, compute_dp_sgd_privacy_statement

def create_dataset():
    data = np.array([[x] for x in range(10)], dtype=np.float32)  # Sample input data
    labels = np.array([[2 * x] for x in range(10)], dtype=np.float32)  # Labels: y = 2x
    dataset = tf.data.Dataset.from_tensor_slices((data, labels))  # Create TensorFlow dataset
    return dataset.batch(2)  # Batch size of 2

  # Simulate multiple clients with small datasets
clients = ['client1', 'client2', 'client3']
client_data = {client: create_dataset() for client in clients}  # Assign dataset to each client


In [10]:
def create_model():
    model = tf.keras.Sequential([
        tf.keras.layers.Dense(1, activation='linear', input_shape=(1,))  # Single-layer model: y = Wx + b
    ])
    return model

  # Convert the Keras model to a federated learning model
def model_fn():
    keras_model = create_model()  # Create base model
    return tff.learning.models.from_keras_model(
        keras_model,
        input_spec=client_data['client1'].element_spec,  # Define input format
        loss=tf.keras.losses.MeanSquaredError(),  # Loss function for regression
        metrics=[tf.keras.metrics.MeanAbsoluteError()]  # Metric to track
    )


In [18]:
# Initialize Federated Averaging (FedAvg) process
trainer = tff.learning.algorithms.build_weighted_fed_avg(model_fn, client_optimizer_fn=lambda: tf.keras.optimizers.SGD(learning_rate=0.01))

# Initialize server state
state = trainer.initialize()
#Use DPKerasSGDOptimizer to add clipping and noise for differential privacy
dpopt = DPKerasSGDOptimizer(l2_norm_clip=1.0, noise_multiplier=0.5, num_microbatches=1)
dpstatement = compute_dp_sgd_privacy_statement(number_of_examples=3, batch_size=2, noise_multiplier=0.5, num_epochs=1, delta=1/3) #delta is typically 1/ num_of_examples
print(dpstatement)
# Simulate federated training round
def client_update(client_dataset, model_weights):
    model = create_model()
    model.set_weights(model_weights)  # Set weights from server
    model.compile(optimizer=dpopt, loss='mse')  # Compile model with optimizer and loss
    model.fit(client_dataset, epochs=1)  # Train locally for 1 epoch
    return model.get_weights()  # Return updated weights


DP-SGD performed over 3 examples with 2 examples per iteration, noise multiplier
0.5 for 1 epochs with microbatching, and no bound on number of examples per
user.

This privacy guarantee protects the release of all model checkpoints in addition
to the final model.

Example-level DP with add-or-remove-one adjacency at delta = 0.3333333333333333
computed with RDP accounting:
    Epsilon with each example occurring once per epoch:        11.721
    Epsilon assuming Poisson sampling (*):                     17.767

No user-level privacy guarantee is possible without a bound on the number of
examples per user.

(*) Poisson sampling is not usually done in training pipelines, but assuming
that the data was randomly shuffled, it is believed that the actual epsilon
should be closer to this value than the conservative assumption of an arbitrary
data order.



In [19]:

# Perform a single round of federated learning
client_weights = [client_update(client_data[client], state.global_model_weights.trainable) for client in clients]

# Average the weights (basic FedAvg step, normally handled by TFF)
new_weights = [np.mean([client_weights[i][j] for i in range(len(clients))], axis=0) for j in range(len(client_weights[0]))]
print(new_weights)

# train for 5 rounds
rounds = 5
for i in range(1, rounds + 1):
  federated_data = [client_data[client] for client in clients]
  state, metrics = trainer.next(state, federated_data) #.next() takes in the current state and a federated data set
  print(f"Round {i} metrics: {metrics}")
  print()



print("Federated learning round complete!")


[array([[-0.5978603]], dtype=float32), array([0.02100591], dtype=float32)]
Round 1 metrics: OrderedDict([('distributor', ()), ('client_work', OrderedDict([('train', OrderedDict([('mean_absolute_error', 5.6203766), ('loss', 45.449688), ('num_examples', 30), ('num_batches', 15)]))])), ('aggregator', OrderedDict([('mean_value', ()), ('mean_weight', ())])), ('finalizer', OrderedDict([('update_non_finite', 0)]))])

Round 2 metrics: OrderedDict([('distributor', ()), ('client_work', OrderedDict([('train', OrderedDict([('mean_absolute_error', 0.38888448), ('loss', 0.19201249), ('num_examples', 30), ('num_batches', 15)]))])), ('aggregator', OrderedDict([('mean_value', ()), ('mean_weight', ())])), ('finalizer', OrderedDict([('update_non_finite', 0)]))])

Round 3 metrics: OrderedDict([('distributor', ()), ('client_work', OrderedDict([('train', OrderedDict([('mean_absolute_error', 0.25799233), ('loss', 0.09142299), ('num_examples', 30), ('num_batches', 15)]))])), ('aggregator', OrderedDict([('mean