<a href="https://colab.research.google.com/github/tylerb55/COMP530/blob/main/woringmobilenetfed.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
! git clone https://github.com/tylerb55/COMP530.git

Cloning into 'COMP530'...
remote: Enumerating objects: 4250, done.[K
remote: Counting objects: 100% (34/34), done.[K
remote: Compressing objects: 100% (34/34), done.[K
remote: Total 4250 (delta 16), reused 0 (delta 0), pack-reused 4216[K
Receiving objects: 100% (4250/4250), 917.09 MiB | 30.44 MiB/s, done.
Resolving deltas: 100% (97/97), done.


In [3]:
! pip install --upgrade tensorflow-federated==0.20.0

Collecting tensorflow-federated==0.20.0
  Downloading tensorflow_federated-0.20.0-py2.py3-none-any.whl (819 kB)
[?25l[K     |▍                               | 10 kB 30.6 MB/s eta 0:00:01[K     |▉                               | 20 kB 19.1 MB/s eta 0:00:01[K     |█▏                              | 30 kB 14.1 MB/s eta 0:00:01[K     |█▋                              | 40 kB 13.1 MB/s eta 0:00:01[K     |██                              | 51 kB 6.4 MB/s eta 0:00:01[K     |██▍                             | 61 kB 7.6 MB/s eta 0:00:01[K     |██▉                             | 71 kB 8.0 MB/s eta 0:00:01[K     |███▏                            | 81 kB 7.7 MB/s eta 0:00:01[K     |███▋                            | 92 kB 8.6 MB/s eta 0:00:01[K     |████                            | 102 kB 7.0 MB/s eta 0:00:01[K     |████▍                           | 112 kB 7.0 MB/s eta 0:00:01[K     |████▉                           | 122 kB 7.0 MB/s eta 0:00:01[K     |█████▏                   

In [None]:
! pip install nest-asyncio



Import Necessary Libraries

In [1]:
import numpy as np
import pandas as pd
import collections
import matplotlib.image as img
import matplotlib.pyplot as plt
import tensorflow as tf
import tensorflow_federated as tff
import tensorflow_datasets as tfds
import nest_asyncio as na

In [2]:
#Have a play around with number of clients and batch size and see how they affect the model
NUM_CLIENTS=5 
NUM_EPOCHS=100
BATCH_SIZE=20
SHUFFLE_BUFFER=100
PREFETCH_BUFFER=10

In [3]:
def Train_Validation_Test_Set(directory_name):
  """a function to load the images in a large directory into a tensorflow dataset object
  the data is split 80:10:10 in training:validation:test. The dataset is shuffled before splitting
  and the images are formatted to 512x512 pixels and grayscale(one color channel and the values range from 0 to 255)"""
  dataset_train=tf.keras.preprocessing.image_dataset_from_directory('/content/COMP530/'+directory_name,
                                                                    labels='inferred',
                                                                    label_mode='int',
                                                                    class_names=['NormalCases','cancercases'],
                                                                    color_mode='rgb',
                                                                    image_size=(512,512),
                                                                    shuffle=True,
                                                                    seed=305,
                                                                    validation_split=0.2,
                                                                    subset='training',
                                                                    batch_size=None
                                                                    )

  dataset_validation=tf.keras.preprocessing.image_dataset_from_directory('/content/COMP530/'+directory_name,
                                                                    labels='inferred',
                                                                    label_mode='int',
                                                                    class_names=['NormalCases','cancercases'],
                                                                    color_mode='rgb',
                                                                    image_size=(512,512),
                                                                    shuffle=True,
                                                                    seed=305,
                                                                    validation_split=0.2,
                                                                    subset='validation',
                                                                    batch_size=None
                                                                    )
  
  dataset_test=tf.keras.preprocessing.image_dataset_from_directory('/content/COMP530/'+directory_name,
                                                                    labels='inferred',
                                                                    label_mode='int',
                                                                    class_names=['NormalCases','cancercases'],
                                                                    color_mode='rgb',
                                                                    image_size=(512,512),
                                                                    shuffle=True,
                                                                    seed=305,
                                                                    validation_split=0.1,
                                                                    subset='validation',
                                                                    batch_size=None
                                                                    )
    
  dataset_validation=dataset_validation.take(dataset_test.__len__())

  return dataset_train,dataset_validation,dataset_test

In [4]:
def preprocess(dataset):

  def format_batch(element):
    """flatten the pixels in a batch and return the features as an 'OrderedDict'"""
    tf.image.per_image_standardization(element['image'])
    return collections.OrderedDict(
        x=tf.image.resize(element['image'],(224,224)),
        y=tf.reshape(element['label'],[-1,1])
    )
  return dataset.repeat(NUM_EPOCHS).shuffle(SHUFFLE_BUFFER, seed=1).batch(BATCH_SIZE).map(format_batch).prefetch(PREFETCH_BUFFER)

In [5]:
def federate_dataset(Dataset,clients):
  """
  args:
  Dataset - the dataset object to be passed into the function and separated between the clients in the simulation
  clients - the number of clients for the dataset to have come from in the simulated environment
  return:
  dataset_by_client - the original dataset federated between the number of clients. 
  each client is labelled by their client id, in a dictionary. each id points to an individual dataset object
  """
  image_count=tf.data.experimental.cardinality(Dataset).numpy()
  image_per_set=int(np.floor(image_count/clients))

  client_train_dataset=collections.OrderedDict()
  Dataset=tfds.as_numpy(Dataset)
  count=0
  client_num=1
  y=[]
  x=[]
  """this code snippet assigns particular training examples to each client. In this example they have been evenly distributed
  hetrogeneity of federated data could be explored by randomly assigning a clients to each training example.
  This would result in some clients having more than enough training data locally and some  and some suffering from data
  paucity locally. A greater reflection of real life struggles when implementing federated learning."""
  for image in Dataset:
    count+=1
    y.append(image[1])
    x.append(image[0])
    if(count==image_per_set):
      x=np.asarray(x,dtype=np.float32)
      y=np.asarray(y,dtype=np.int32)
      data=collections.OrderedDict((('label', y), ('image', x)))
      client_train_dataset["client_"+str(client_num)]=data
      count=0
      client_num+=1
      y=[]
      x=[]

  federated_dataset=tff.simulation.datasets.TestClientData(client_train_dataset)
  return federated_dataset

In [6]:
def AlexNet_model():
   return tf.keras.models.Sequential([
                                      tf.keras.layers.Conv2D(filters=96,kernel_size=(11,11),strides=(4,4),activation='relu',input_shape=(224,224,3)),
                                      tf.keras.layers.BatchNormalization(),
                                      tf.keras.layers.MaxPool2D(pool_size=(3,3),strides=(2,2)),
                                      tf.keras.layers.Conv2D(filters=256,kernel_size=(5,5),strides=(1,1),activation='relu',padding='same'),
                                      tf.keras.layers.BatchNormalization(),
                                      tf.keras.layers.MaxPool2D(pool_size=(3,3),strides=(2,2)),
                                      tf.keras.layers.Conv2D(filters=384,kernel_size=(3,3),strides=(1,1),activation='relu',padding='same'),
                                      tf.keras.layers.BatchNormalization(),
                                      tf.keras.layers.Conv2D(filters=384,kernel_size=(3,3),strides=(1,1),activation='relu',padding='same'),
                                      tf.keras.layers.BatchNormalization(),
                                      tf.keras.layers.Conv2D(filters=256,kernel_size=(3,3),strides=(1,1),activation='relu',padding='same'),
                                      tf.keras.layers.BatchNormalization(),
                                      tf.keras.layers.MaxPool2D(pool_size=(3,3),strides=(2,2)),
                                      tf.keras.layers.Flatten(),
                                      tf.keras.layers.Dense(4096,activation='relu'),
                                      tf.keras.layers.Dropout(0.5),
                                      tf.keras.layers.Dense(4096,activation='relu'),
                                      tf.keras.layers.Dropout(0.5),
                                      tf.keras.layers.Dense(1,activation='softmax')
])

In [15]:
def MobileNet_model():
  base_model=tf.keras.applications.MobileNetV2(weights='imagenet',input_shape=(224,224,3),include_top=True) # use mobilenetv2 as the base for the tl model
  base_model.trainable = False # freeze the mobilenetv2 layers

  inputs = tf.keras.Input(shape=(224, 224, 3))
  # We make sure that the base_model is running in inference mode here,
  # by passing `training=False`. This is important for fine-tuning, as you will
  # learn in a few paragraphs.
  x = base_model(inputs, training=False)
  # Convert features of shape `base_model.output_shape[1:]` to vectors
  #x = tf.keras.layers.GlobalAveragePooling2D()(x)
  # A Dense classifier with a single unit (binary classification)
  outputs = tf.keras.layers.Dense(1,activation='sigmoid')(x)
  model = tf.keras.Model(inputs, outputs)
  return model


In [8]:
alexnet=AlexNet_model()
alexnet.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 54, 54, 96)        34944     
                                                                 
 batch_normalization (BatchN  (None, 54, 54, 96)       384       
 ormalization)                                                   
                                                                 
 max_pooling2d (MaxPooling2D  (None, 26, 26, 96)       0         
 )                                                               
                                                                 
 conv2d_1 (Conv2D)           (None, 26, 26, 256)       614656    
                                                                 
 batch_normalization_1 (Batc  (None, 26, 26, 256)      1024      
 hNormalization)                                                 
                                                        

In [16]:
mobilenet=MobileNet_model()
mobilenet.summary()

Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_4 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 mobilenetv2_1.00_224 (Funct  (None, 1000)             3538984   
 ional)                                                          
                                                                 
 dense_4 (Dense)             (None, 1)                 1001      
                                                                 
Total params: 3,539,985
Trainable params: 1,001
Non-trainable params: 3,538,984
_________________________________________________________________


In [17]:
def make_federated_data(client_data,client_ids):
  return[
         preprocess(client_data.create_tf_dataset_for_client(x))
         for x in client_ids         
  ]

In [21]:
def federated_model():
  AlexNet_Fed=MobileNet_model()
  return tff.learning.from_keras_model(
      AlexNet_Fed,
      input_spec=preprocessed_example_dataset.element_spec,
      loss=tf.keras.losses.BinaryCrossentropy(),
      metrics=[tf.keras.metrics.BinaryAccuracy(),tf.keras.metrics.Precision(),tf.keras.metrics.Recall(),tf.keras.metrics.TruePositives(),tf.keras.metrics.TrueNegatives(),tf.keras.metrics.FalsePositives(),tf.keras.metrics.FalseNegatives()])

In [12]:
""" the original dataset held in keras dataset objects """
dataset_train, dataset_validation,dataset_test=Train_Validation_Test_Set("Dataset1")
""" the dataset split by the which client the data has come from """
federated_train_dataset=federate_dataset(dataset_train,NUM_CLIENTS)
federated_test_dataset=federate_dataset(dataset_test,NUM_CLIENTS)
""" an example dataset for a single client used to get the input specification for the federated model """
example_dataset = federated_train_dataset.create_tf_dataset_for_client(federated_train_dataset.client_ids[0])
preprocessed_example_dataset=preprocess(example_dataset)
""" the federated dataset that can be passed into the federated environemt to train the network """
federated_train_data = make_federated_data(federated_train_dataset, federated_train_dataset.client_ids)

Found 1097 files belonging to 2 classes.
Using 878 files for training.
Found 1097 files belonging to 2 classes.
Using 219 files for validation.
Found 1097 files belonging to 2 classes.
Using 109 files for validation.


In [32]:
iterative_process = tff.learning.build_federated_averaging_process(
    federated_model,
    client_optimizer_fn=lambda: tf.keras.optimizers.SGD(learning_rate=0.0001),#change to 0.01 make optimizer adam
    server_optimizer_fn=lambda: tf.keras.optimizers.SGD(learning_rate=1.0))

In [33]:
na.apply()
state=iterative_process.initialize()
NUM_ROUNDS=10
for round_num in range(1, NUM_ROUNDS):
  state, metrics = iterative_process.next(state, federated_train_data)
  print('round {:2d}, metrics={}'.format(round_num, metrics))

round  1, metrics=OrderedDict([('broadcast', ()), ('aggregation', OrderedDict([('mean_value', ()), ('mean_weight', ())])), ('train', OrderedDict([('binary_accuracy', 0.6194286), ('precision', 0.6194286), ('recall', 1.0), ('true_positives', 54200.0), ('true_negatives', 0.0), ('false_positives', 33300.0), ('false_negatives', 0.0), ('loss', 0.6860712), ('num_examples', 87500), ('num_batches', 4375)]))])
round  2, metrics=OrderedDict([('broadcast', ()), ('aggregation', OrderedDict([('mean_value', ()), ('mean_weight', ())])), ('train', OrderedDict([('binary_accuracy', 0.6194286), ('precision', 0.6194286), ('recall', 1.0), ('true_positives', 54200.0), ('true_negatives', 0.0), ('false_positives', 33300.0), ('false_negatives', 0.0), ('loss', 0.86811775), ('num_examples', 87500), ('num_batches', 4375)]))])
round  3, metrics=OrderedDict([('broadcast', ()), ('aggregation', OrderedDict([('mean_value', ()), ('mean_weight', ())])), ('train', OrderedDict([('binary_accuracy', 0.6194286), ('precision',

Test the model on the test set and evaluate performance

Produce graphs and visualisations for evalutation data