# Federated Tensorflow Mnist Tutorial

In [1]:
# # Install dependencies if not already installed
# !pip install tensorflow==2.3.1

In [2]:
import tensorflow as tf
tf.__version__

2022-04-06 09:33:05.837227: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0


'2.5.0'

## Connect to the Federation

In [3]:
# Create a federation
from openfl.interface.interactive_api.federation import Federation

# please use the same identificator that was used in signed certificate
client_id = 'api'
cert_dir = 'cert'
director_node_fqdn = '192.168.1.129'
director_port=50051
# 1) Run with API layer - Director mTLS 
# If the user wants to enable mTLS their must provide CA root chain, and signed key pair to the federation interface
# cert_chain = f'{cert_dir}/root_ca.crt'
# api_certificate = f'{cert_dir}/{client_id}.crt'
# api_private_key = f'{cert_dir}/{client_id}.key'

# federation = Federation(
#     client_id=client_id,
#     director_node_fqdn=director_node_fqdn,
#     director_port=director_port,
#     cert_chain=cert_chain,
#     api_cert=api_certificate,
#     api_private_key=api_private_key
# )

# --------------------------------------------------------------------------------------------------------------------

# 2) Run with TLS disabled (trusted environment)
# Federation can also determine local fqdn automatically
federation = Federation(
    client_id=client_id,
    director_node_fqdn=director_node_fqdn,
    director_port=director_port, 
    tls=False
)


In [4]:
shard_registry = federation.get_shard_registry()
shard_registry

{'env_one': {'shard_info': node_info {
    name: "env_one"
  }
  shard_description: "Mnist dataset, shard number 1 out of 3"
  sample_shape: "416"
  sample_shape: "416"
  sample_shape: "3"
  target_shape: "1",
  'is_online': True,
  'is_experiment_running': False,
  'last_updated': '2022-04-06 09:32:38',
  'current_time': '2022-04-06 09:33:10',
  'valid_duration': seconds: 120,
  'experiment_name': 'ExperimentName Mock'},
 'env_two': {'shard_info': node_info {
    name: "env_two"
  }
  shard_description: "Mnist dataset, shard number 2 out of 3"
  sample_shape: "416"
  sample_shape: "416"
  sample_shape: "3"
  target_shape: "1",
  'is_online': True,
  'is_experiment_running': False,
  'last_updated': '2022-04-06 09:32:44',
  'current_time': '2022-04-06 09:33:10',
  'valid_duration': seconds: 120,
  'experiment_name': 'ExperimentName Mock'},
 'env_three': {'shard_info': node_info {
    name: "env_three"
  }
  shard_description: "Mnist dataset, shard number 3 out of 3"
  sample_shape: "41

In [5]:
# First, request a dummy_shard_desc that holds information about the federated dataset 
dummy_shard_desc = federation.get_dummy_shard_descriptor(size=10)
dummy_shard_dataset = dummy_shard_desc.get_dataset('train')
sample, target = dummy_shard_dataset[0]
f"Sample shape: {sample.shape}, target shape: {target.shape}"

'Sample shape: (416, 416, 3), target shape: (1,)'

## Describing FL experimen

In [None]:
from openfl.interface.interactive_api.experiment import TaskInterface, DataInterface, ModelInterface, FLExperiment
from IPython.display import clear_output
clear_output()

### Register model

In [7]:
from layers import create_model, optimizer
import os
from IPython.display import clear_output

from models import YoloV3
from utils import freeze_all

"""transfer learning"""
# layers to be transfered
TRANSFER_LEARNING = [
    'yolo_conv_0',
    'yolo_conv_1',
    'yolo_conv_2',
]

# load pretrained YOLOv3
model_pretrained = YoloV3(416, channels=3,classes=80,auxiliary=False,training=False)
model_pretrained.load_weights("yolov3.h5")

# get the weights
TRANSFER_LEARNING_WEIGHTS = []
for layer in TRANSFER_LEARNING:
    TRANSFER_LEARNING_WEIGHTS.append(model_pretrained.get_layer(layer).get_weights())

# clear session to remove ALL the tf graph
# actually we just wan remove model_pretrained, but we din get to choose
clear_output()
print("Cleaning tf graph and restart the backbone and victim head graph...")
import tensorflow.keras.backend as K
K.clear_session()
del model_pretrained

model = create_model(classes = 1, training=True)
for i, layer in enumerate(TRANSFER_LEARNING):
    model.get_layer(layer).set_weights(TRANSFER_LEARNING_WEIGHTS[i])
    # freeze the yolo darknet backbone 
    freeze_all(model.get_layer(layer))
print("FPN weights are loaded")


"""load weights if we continue training"""
if os.path.exists('best_model.h5'):
#     trained_model = tf.keras.models.load_model('best_model.h5')
#     for layer in ['yolo_output_0', 'yolo_output_1', 'yolo_output_2']:
#         model.get_layer(layer).set_weights(
#             trained_model.get_layer(layer).get_weights())
    model = tf.keras.models.load_model('best_model.h5')
    print('Model is loaded with pretrained weight')
else:
    print('Model is initialized with empty weight (except FPN)')
    

"""model interface"""
framework_adapter = 'openfl.plugins.frameworks_adapters.keras_adapter.FrameworkAdapterPlugin'
MI = ModelInterface(model=model, optimizer=optimizer, framework_plugin=framework_adapter)

Cleaning tf graph and restart the backbone and victim head graph...
FPN weights are loaded
Model is loaded with pretrained weight


In [8]:
# model.save('test.h5')
# model = tf.keras.models.load_model('test.h5')

# model.save_weights('test.h5')
# new_model = create_model(classes = 1, training=True)
# for layer in TRANSFER_LEARNING:
#     new_model.get_layer(layer).set_weights(
#         model.get_layer(layer).get_weights())

In [9]:
model.summary()

Model: "victim_head"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_2 (InputLayer)            [(None, None, None,  0                                            
__________________________________________________________________________________________________
yolo_conv_0 (Functional)        (None, None, None, 5 11024384    input_2[0][0]                    
__________________________________________________________________________________________________
input_1 (InputLayer)            [(None, None, None,  0                                            
__________________________________________________________________________________________________
yolo_conv_1 (Functional)        (None, None, None, 2 2957312     yolo_conv_0[0][0]                
                                                                 input_1[0][0]          

### Register dataset

In [10]:
import numpy as np
from tensorflow.keras.utils import Sequence

class DataGenerator(Sequence):

    def __init__(self, shard_descriptor, batch_size):
        self.shard_descriptor = shard_descriptor
        self.batch_size = batch_size
        self.indices = np.arange(len(shard_descriptor))
        self.on_epoch_end()

    def __len__(self):
        return len(self.indices) // self.batch_size

    def __getitem__(self, index):
        index = self.indices[index * self.batch_size:(index + 1) * self.batch_size]
        batch = [self.indices[k] for k in index]

        X, y = self.shard_descriptor[batch]
        return X, y

    def on_epoch_end(self):
        np.random.shuffle(self.indices)


class MnistFedDataset(DataInterface):

    def __init__(self, **kwargs):
        super().__init__(**kwargs)

    @property
    def shard_descriptor(self):
        return self._shard_descriptor

    @shard_descriptor.setter
    def shard_descriptor(self, shard_descriptor):
        """
        Describe per-collaborator procedures or sharding.

        This method will be called during a collaborator initialization.
        Local shard_descriptor will be set by Envoy.
        """
        self._shard_descriptor = shard_descriptor
        
        self.train_set = shard_descriptor.get_dataset('train')
        self.valid_set = shard_descriptor.get_dataset('val')

    def __getitem__(self, index):
        return self.shard_descriptor[index]

    def __len__(self):
        return len(self.shard_descriptor)

    def get_train_loader(self):
        """
        Output of this method will be provided to tasks with optimizer in contract
        """
        if self.kwargs['train_bs']:
            batch_size = self.kwargs['train_bs']
        else:
            batch_size = 32
        return DataGenerator(self.train_set, batch_size=batch_size)

    def get_valid_loader(self):
        """
        Output of this method will be provided to tasks without optimizer in contract
        """
        if self.kwargs['valid_bs']:
            batch_size = self.kwargs['valid_bs']
        else:
            batch_size = 32
        
        return DataGenerator(self.valid_set, batch_size=batch_size)

    def get_train_data_size(self):
        """
        Information for aggregation
        """

        return len(self.train_set)

    def get_valid_data_size(self):
        """
        Information for aggregation
        """
        return len(self.valid_set)

### Create Mnist federated dataset

In [11]:
num_grad_accumulates = 8
fed_dataset = MnistFedDataset(train_bs=64//num_grad_accumulates, valid_bs=8)

## Define and register FL tasks

In [12]:
from typing import List, Optional, Union
import tensorflow as tf
def accumulated_gradients(gradients: Optional[List[tf.Tensor]],
                          step_gradients: List[Union[tf.Tensor, tf.IndexedSlices]],
                          num_grad_accumulates: int) -> tf.Tensor:
    if gradients is None:
        gradients = [flat_gradients(g) / num_grad_accumulates for g in step_gradients]
    else:
        for i, g in enumerate(step_gradients):
            gradients[i] += flat_gradients(g) / num_grad_accumulates
        
    return gradients

# This is needed for tf.gather like operations.
def flat_gradients(grads_or_idx_slices: tf.Tensor) -> tf.Tensor:
    '''Convert gradients if it's tf.IndexedSlices.
    When computing gradients for operation concerning `tf.gather`, the type of gradients 
    '''
    if type(grads_or_idx_slices) == tf.IndexedSlices:
        return tf.scatter_nd(
            tf.expand_dims(grads_or_idx_slices.indices, 1),
            grads_or_idx_slices.values,
            grads_or_idx_slices.dense_shape
        )
    return grads_or_idx_slices

In [13]:
TI = TaskInterface()

import time
import tensorflow as tf
from layers import loss

from math import pi, cos, acos
import numpy as np

from utils import freeze_all
# hardcode
ROUNDS = 5 #25
initial_learning_rate = 1e-4
last_learning_rate = 1e-5
num_grad_accumulates = num_grad_accumulates = 8
current_epoch = 20 #0
@TI.register_fl_task(model='model', data_loader='train_dataset', \
                     device='device', optimizer='optimizer')     
def train(model, train_dataset, optimizer, device, loss_fn=loss, warmup=False):
    global current_epoch

    """Assign the initial learning rate if this is first epoch"""    
    # assign lr with intitial lr
    if current_epoch == 0:
        optimizer.lr = initial_learning_rate
        for layer in ['yolo_conv_0', 'yolo_conv_1', 'yolo_conv_2']:
            freeze_all(model.get_layer(layer), frozen=True) 

    if current_epoch == 20:
        print("Decrease LR for the remaining epoch...")
        optimizer.lr = last_learning_rate
        print("Unfreeze FPN layers...")
        for layer in ['yolo_conv_0', 'yolo_conv_1', 'yolo_conv_2']:
            freeze_all(model.get_layer(layer), frozen=False)
    print("################################################################")
    
    """Print Current LR"""
    try:
        lr = optimizer.lr.numpy()
    except:
        lr = optimizer.lr('float32').numpy()
    print(f"Epoch:\t{current_epoch}\tLR:\t{lr}")

    """reset optimizer (part 1)"""
    if current_epoch % 5 == 0 and current_epoch != 20:
        # get current lr
        try:
            lr = optimizer.lr.numpy()
        except:
            lr = optimizer.lr('float32').numpy()
        new_optimizer = tf.keras.optimizers.Adam(learning_rate=lr)
        
        
    """Training Loop for this Epoch"""
    # Iterate over the batches of the dataset.
    grads = None
    loss = loss_fn #becuz originally my naming is using loss
    avg_loss = tf.keras.metrics.Mean('loss', dtype=tf.float32)
    for mini_batch, (images, labels) in enumerate(train_dataset):
        with tf.GradientTape() as tape:
            outputs = model(images, training=True)
            regularization_loss = tf.reduce_sum(model.losses)
            pred_loss = []
            for output, label, loss_fn in zip(outputs, labels, loss):
                pred_loss.append(loss_fn(label, output))
            total_loss = tf.reduce_sum(pred_loss) + regularization_loss
            
        step_grads = tape.gradient(total_loss, model.trainable_variables)
        grads = accumulated_gradients(grads, step_grads, num_grad_accumulates)

        # update gradient at a full batch
        if mini_batch % num_grad_accumulates == 0 and mini_batch != 0:
            if 'new_optimizer' in locals():
                new_optimizer.apply_gradients(
                    zip(grads, model.trainable_variables))
            else:
                optimizer.apply_gradients(
                    zip(grads, model.trainable_variables))                
            grads = None

            print("train_{}, {}, {}".format(
                mini_batch // num_grad_accumulates , total_loss.numpy(),
                list(map(lambda x: np.sum(x.numpy()), pred_loss))))
            avg_loss.update_state(total_loss)
            
        if warmup:
            break

    if grads is not None:
        if 'new_optimizer' in locals():
            new_optimizer.apply_gradients(
                zip(grads, model.trainable_variables))
        else:
            optimizer.apply_gradients(
                zip(grads, model.trainable_variables))                
        grads = None

        print("train_{}, {}, {}".format(
            mini_batch // num_grad_accumulates + 1, total_loss.numpy(),
            list(map(lambda x: np.sum(x.numpy()), pred_loss))))
        avg_loss.update_state(total_loss)

    print("End of epoch, train: {}\n".format(avg_loss.result().numpy()))
    loss_of_the_epoch = avg_loss.result().numpy()
    avg_loss.reset_states()
      
    
    """reset optimizer (part 2)"""
    if current_epoch % 5 == 0 and current_epoch != 20:
        print("reset optimizer...")
        optimizer.set_weights(new_optimizer.get_weights())    

        
    """change lr accordingly"""
    current_epoch += 1
    if current_epoch == 20:
        print("Decrease LR for the remaining epoch...")
        optimizer.lr = last_learning_rate
        print("Unfreeze FPN layers...")
        for layer in ['yolo_conv_0', 'yolo_conv_1', 'yolo_conv_2']:
            freeze_all(model.get_layer(layer), frozen=False)

    return {'train_acc': loss_of_the_epoch,}


@TI.register_fl_task(model='model', data_loader='val_dataset', device='device')     
def validate(model, val_dataset, device):
    # Run a validation loop at the end of each epoch.
    avg_loss = tf.keras.metrics.Mean('loss', dtype=tf.float32)
    for images, labels in val_dataset:
        outputs = model(images, training=False)
        # Update val metrics
        regularization_loss = tf.reduce_sum(model.losses)
        pred_loss = []
        for output, label, loss_fn in zip(outputs, labels, loss):
            pred_loss.append(loss_fn(label, output))
        total_loss = tf.reduce_sum(pred_loss) + regularization_loss
        avg_loss.update_state(total_loss)
    print("Validation acc: %.4f" % (float(avg_loss.result().numpy()),))
            
    return {'validation_accuracy': float(avg_loss.result().numpy()),}

## Time to start a federated learning experiment

In [14]:
# create an experimnet in federation
experiment_name = 'victim detection'
fl_experiment = FLExperiment(federation=federation, experiment_name=experiment_name)

In [15]:
# The following command zips the workspace and python requirements to be transfered to collaborator nodes
fl_experiment.start(model_provider=MI, 
                   task_keeper=TI,
                   data_loader=fed_dataset,
                   rounds_to_train=ROUNDS,
                   opt_treatment='CONTINUE_GLOBAL')

In [16]:
fl_experiment.stream_metrics()

In [17]:
xxx = dir(fl_experiment)
for _ in xxx:
    print(_)

__class__
__delattr__
__dict__
__dir__
__doc__
__eq__
__format__
__ge__
__getattribute__
__gt__
__hash__
__init__
__init_subclass__
__le__
__lt__
__module__
__ne__
__new__
__reduce__
__reduce_ex__
__repr__
__setattr__
__sizeof__
__str__
__subclasshook__
__weakref__
_assert_experiment_accepted
_get_initial_tensor_dict
_pack_the_workspace
_prepare_plan
_rebuild_model
_serialize_interface_objects
current_model_status
define_task_assigner
experiment_accepted
experiment_name
federation
get_best_model
get_last_model
logger
plan
prepare_workspace_distribution
remove_experiment_data
remove_workspace_archive
restore_experiment_state
serializer_plugin
start
stream_metrics
summary_writer
task_runner_stub
train_task_exist
validation_task_exist
write_tensorboard_metric


In [18]:
best_model = fl_experiment.get_last_model()

In [19]:
best_model.save('best_model.h5')



In [20]:
1

1