<a href="https://colab.research.google.com/github/tylerb55/COMP530/blob/main/ResNet50Fed.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Load the dataset into colab**

In [2]:
! git clone https://github.com/tylerb55/COMP530.git

Cloning into 'COMP530'...
remote: Enumerating objects: 7311, done.[K
remote: Counting objects: 100% (218/218), done.[K
remote: Compressing objects: 100% (211/211), done.[K
remote: Total 7311 (delta 11), reused 209 (delta 7), pack-reused 7093[K
Receiving objects: 100% (7311/7311), 1.09 GiB | 16.67 MiB/s, done.
Resolving deltas: 100% (236/236), done.
Checking out files: 100% (5010/5010), done.


In [None]:
! pip install --upgrade tensorflow-federated==0.20.0

Collecting tensorflow-federated==0.20.0
  Downloading tensorflow_federated-0.20.0-py2.py3-none-any.whl (819 kB)
[?25l[K     |▍                               | 10 kB 27.4 MB/s eta 0:00:01[K     |▉                               | 20 kB 33.0 MB/s eta 0:00:01[K     |█▏                              | 30 kB 36.1 MB/s eta 0:00:01[K     |█▋                              | 40 kB 27.6 MB/s eta 0:00:01[K     |██                              | 51 kB 21.3 MB/s eta 0:00:01[K     |██▍                             | 61 kB 24.4 MB/s eta 0:00:01[K     |██▉                             | 71 kB 25.1 MB/s eta 0:00:01[K     |███▏                            | 81 kB 27.1 MB/s eta 0:00:01[K     |███▋                            | 92 kB 28.9 MB/s eta 0:00:01[K     |████                            | 102 kB 28.6 MB/s eta 0:00:01[K     |████▍                           | 112 kB 28.6 MB/s eta 0:00:01[K     |████▉                           | 122 kB 28.6 MB/s eta 0:00:01[K     |█████▏           

# **Import Necessary Libraries**

In [1]:
import numpy as np
import collections
import matplotlib.image as img
import matplotlib.pyplot as plt
import tensorflow as tf
import tensorflow_federated as tff
import tensorflow_datasets as tfds
import nest_asyncio as na
import random 

In [2]:
NUM_CLIENTS=5 
NUM_EPOCHS=10
BATCH_SIZE=1
SHUFFLE_BUFFER=100
PREFETCH_BUFFER=10

# **Load the dataset into the environment and make it a federated dataset**

In [3]:
def Train_and_Test_Set(directory_name):
  """a function to load the images in a large directory into a tensorflow dataset object
  the data is split 80:20 in training:test. The dataset is shuffled before splitting
  and the images are formatted to 512x512 pixels (pixel values range from 0 to 255)
  Images are labelled 0,1 based on the folder they are in. Normalcases are 0 and 
  cancercases are 1"""
  dataset_train=tf.keras.preprocessing.image_dataset_from_directory('/content/COMP530/'+directory_name,
                                                                    labels='inferred',
                                                                    label_mode='int',
                                                                    class_names=['NormalCases','cancercases'],
                                                                    color_mode='rgb',
                                                                    image_size=(512,512),
                                                                    shuffle=True,
                                                                    seed=305,
                                                                    validation_split=0.2,
                                                                    subset='training',
                                                                    batch_size=None
                                                                    )
  
  dataset_test=tf.keras.preprocessing.image_dataset_from_directory('/content/COMP530/'+directory_name,
                                                                    labels='inferred',
                                                                    label_mode='int',
                                                                    class_names=['NormalCases','cancercases'],
                                                                    color_mode='rgb',
                                                                    image_size=(512,512),
                                                                    shuffle=True,
                                                                    seed=305,
                                                                    validation_split=0.2,
                                                                    subset='validation',
                                                                    batch_size=None
                                                                    )

  return dataset_train,dataset_test

In [4]:
def Train_Test_Set(directory_name):
  """a function to load the images in a large directory into a tensorflow dataset object
  the data is split 80:20 in training:test. The dataset is shuffled before splitting
  and the images are formatted to 512x512 pixels (pixel values range from 0 to 255)
  Images are labelled 0,1 based on the folder they are in. Normalcases are 0 and 
  cancercases are 1"""
  dataset_train=tf.keras.preprocessing.image_dataset_from_directory('/content/COMP530/'+directory_name,
                                                                    labels='inferred',
                                                                    label_mode='int',
                                                                    class_names=['Normal','Cancer'],
                                                                    color_mode='rgb',
                                                                    image_size=(512,512),
                                                                    shuffle=True,
                                                                    seed=305,
                                                                    validation_split=0.2,
                                                                    subset='training',
                                                                    batch_size=None
                                                                    )
  
  dataset_test=tf.keras.preprocessing.image_dataset_from_directory('/content/COMP530/'+directory_name,
                                                                    labels='inferred',
                                                                    label_mode='int',
                                                                    class_names=['Normal','Cancer'],
                                                                    color_mode='rgb',
                                                                    image_size=(512,512),
                                                                    shuffle=True,
                                                                    seed=305,
                                                                    validation_split=0.2,
                                                                    subset='validation',
                                                                    batch_size=None
                                                                    )

  return dataset_train,dataset_test

In [5]:
def federate_dataset(Dataset,clients):
  """
  args:
  Dataset - the dataset object to be passed into the function and separated between the clients in the simulation
  clients - the number of clients for the dataset to have come from in the simulated environment
  return:
  dataset_by_client - the original dataset federated between the number of clients. 
  each client is labelled by their client id, in a dictionary. each id points to an individual dataset object
  """
  image_count=tf.data.experimental.cardinality(Dataset).numpy()
  image_per_set=int(np.floor(image_count/clients))

  client_train_dataset=collections.OrderedDict()
  Dataset=tfds.as_numpy(Dataset)
  count=0
  client_num=1
  y=[]
  x=[]
  """this code snippet assigns particular training examples to each client. In this example they have been evenly distributed
  hetrogeneity of federated data could be explored by randomly assigning a clients to each training example.
  This would result in some clients having more than enough training data locally and some  and some suffering from data
  paucity locally. """
  for image in Dataset:
    count+=1
    y.append(image[1])
    x.append(image[0])
    if(count==image_per_set):
      x=np.asarray(x,dtype=np.float32)
      y=np.asarray(y,dtype=np.int32)
      data=collections.OrderedDict((('label', y), ('image', x)))
      client_train_dataset["client_"+str(client_num)]=data
      count=0
      client_num+=1
      y=[]
      x=[]

  federated_dataset=tff.simulation.datasets.TestClientData(client_train_dataset)
  return federated_dataset

In [6]:
def preprocess(dataset):

  def format_batch(element):
    """flatten the pixels in a batch and return the features as an 'OrderedDict'"""
    tf.image.per_image_standardization(element['image'])
    return collections.OrderedDict(
        x=tf.image.resize(element['image'],(224,224)),
        y=tf.reshape(element['label'],[-1,1])
    )
  return dataset.repeat(NUM_EPOCHS).shuffle(SHUFFLE_BUFFER, seed=1).batch(BATCH_SIZE).map(format_batch).prefetch(PREFETCH_BUFFER)

In [7]:
def make_federated_data(client_data,client_ids,training):
  """ the federated dataset that can be passed into the federated environemt to train or test the network """
  if training:
    client_ids=random.sample(client_ids,random.randint(2,NUM_CLIENTS))
  return[
         preprocess(client_data.create_tf_dataset_for_client(x))
         for x in client_ids         
  ]

In [8]:
""" the original dataset held in keras dataset objects """
#dataset_train, dataset_test=Train_and_Test_Set("Dataset1")
dataset_train, dataset_test=Train_Test_Set("IQQ-OTHNCCD+")
""" the dataset split by the which client the data has come from """
federated_train_dataset=federate_dataset(dataset_train,NUM_CLIENTS)
federated_test_dataset=federate_dataset(dataset_test,NUM_CLIENTS)
""" an example dataset for a single client used to get the input specification for the federated model """
example_dataset = federated_train_dataset.create_tf_dataset_for_client(federated_train_dataset.client_ids[0])
preprocessed_example_dataset=preprocess(example_dataset)

Found 2163 files belonging to 2 classes.
Using 1731 files for training.
Found 2163 files belonging to 2 classes.
Using 432 files for validation.


# **Load a pretrained ResNet50 model as the CNN for the environment**

In [9]:
def ResNet_model():
  base_model=tf.keras.applications.ResNet50(weights='imagenet',input_shape=(224,224,3),include_top=False) # use resnet50 as the base for the tl model
  base_model.trainable = False # freeze the resnet50 layers

  inputs = tf.keras.Input(shape=(224, 224, 3))
  #inputs = tf.keras.layers.RandomFlip("horizontal_and_vertical")(inputs)
  #inputs = tf.keras.layers.RandomRotation(0.2)(inputs)
  # We make sure that the base_model is running in inference mode here,
  # by passing `training=False`.
  x = base_model(inputs, training=False)
  # Convert features of shape `base_model.output_shape[1:]` to vectors
  x = tf.keras.layers.GlobalAveragePooling2D()(x)
  x = tf.keras.layers.Dropout(0.2)(x)
  # A Dense classifier with a single unit (binary classification)
  outputs = tf.keras.layers.Dense(1,activation='sigmoid')(x)
  model = tf.keras.Model(inputs, outputs)
  return model


In [10]:
resnet=ResNet_model()
resnet.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 resnet50 (Functional)       (None, 7, 7, 2048)        23587712  
                                                                 
 global_average_pooling2d (G  (None, 2048)             0         
 lobalAveragePooling2D)                                          
                                                                 
 dropout (Dropout)           (None, 2048)              0         
                                                                 
 dense (Dense)               (None, 1)                 2049      
                                                                 
Total params: 23,589,761
Trainable params: 2,049
Non-trainable params: 23,587,712
_____________________________________________

# **Create the federated environment based on the ResNet50 model** 

In [11]:
def federated_model():
  resnet=ResNet_model()
  return tff.learning.from_keras_model(
      resnet,
      input_spec=preprocessed_example_dataset.element_spec,
      loss=tf.keras.losses.BinaryCrossentropy(),
      metrics=[tf.keras.metrics.BinaryAccuracy(),tf.keras.metrics.Precision(),tf.keras.metrics.Recall(),tf.keras.metrics.TruePositives(),tf.keras.metrics.TrueNegatives(),tf.keras.metrics.FalsePositives(),tf.keras.metrics.FalseNegatives()])

In [12]:
iterative_process = tff.learning.build_federated_averaging_process(
    federated_model,
    client_optimizer_fn=lambda: tf.keras.optimizers.SGD(learning_rate=0.01),
    server_optimizer_fn=lambda: tf.keras.optimizers.SGD(learning_rate=0.0001))

Instructions for updating:
Colocations handled automatically by placer.


In [13]:
%load_ext tensorboard

In [14]:
%reload_ext tensorboard

In [16]:
na.apply()
logdir = "/tmp/logs/scalars/training/"
summary_writer = tf.summary.create_file_writer(logdir)
state=iterative_process.initialize()
NUM_ROUNDS=101
with summary_writer.as_default():
  for round_num in range(1, NUM_ROUNDS):
    federated_train_data = make_federated_data(federated_train_dataset, federated_train_dataset.client_ids,training=True)
    state, metrics = iterative_process.next(state, federated_train_data)
    train=metrics['train']
    specificity=train['true_negatives']/(train['true_negatives']+train['false_positives'])
    sensitivity=train['true_positives']/(train['true_positives']+train['false_negatives'])
    tf.summary.scalar('Specificity', specificity, step=round_num)
    tf.summary.scalar('Sensitivity', sensitivity, step=round_num)
    print('round {:2d}, metrics={}'.format(round_num, metrics['train']),'(Specificity,',specificity,') (Sensitivity,',sensitivity,')')
    for name, value in metrics['train'].items():
        tf.summary.scalar(name, value, step=round_num)
        if(name=='recall'):
          break

round  1, metrics=OrderedDict([('binary_accuracy', 0.4781792), ('precision', 0.478242), ('recall', 0.9993958), ('true_positives', 3308.0), ('true_negatives', 1.0), ('false_positives', 3609.0), ('false_negatives', 2.0), ('loss', 8.000723), ('num_examples', 6920), ('num_batches', 6920)]) (Specificity, 0.0002770083 ) (Sensitivity, 0.9993958 )
round  2, metrics=OrderedDict([('binary_accuracy', 0.49754336), ('precision', 0.49209714), ('recall', 0.75198823), ('true_positives', 5106.0), ('true_negatives', 1780.0), ('false_positives', 5270.0), ('false_negatives', 1684.0), ('loss', 7.71244), ('num_examples', 13840), ('num_batches', 13840)]) (Specificity, 0.25248227 ) (Sensitivity, 0.75198823 )
round  3, metrics=OrderedDict([('binary_accuracy', 0.50587666), ('precision', 0.50296456), ('recall', 0.6727273), ('true_positives', 3478.0), ('true_negatives', 1773.0), ('false_positives', 3437.0), ('false_negatives', 1692.0), ('loss', 7.5895486), ('num_examples', 10380), ('num_batches', 10380)]) (Specif

InternalError: ignored

# **Finetune the server model for a further 5 rounds**

This part will take much longer as the pretrained layers are now unfrozen and are available to be trained

In [None]:
model_weights = iterative_process.get_model_weights(state)

In [None]:
def finetune_federated_model(model_weights):
  resnet=ResNet_model()
  model_weights.assign_weights_to(resnet)
  resnet.trainable=True
  return tff.learning.from_keras_model(
      resnet,
      input_spec=preprocessed_example_dataset.element_spec,
      loss=tf.keras.losses.BinaryCrossentropy(),
      metrics=[tf.keras.metrics.BinaryAccuracy(),tf.keras.metrics.Precision(),tf.keras.metrics.Recall(),tf.keras.metrics.TruePositives(),tf.keras.metrics.TrueNegatives(),tf.keras.metrics.FalsePositives(),tf.keras.metrics.FalseNegatives()])

In [None]:
iterative_process = tff.learning.build_federated_averaging_process(
    lambda: finetune_federated_model(model_weights),
    client_optimizer_fn=lambda: tf.keras.optimizers.SGD(learning_rate=0.0001),
    server_optimizer_fn=lambda: tf.keras.optimizers.SGD(learning_rate=0.0001))

In [None]:
state=iterative_process.initialize()
finetuning_rounds=NUM_ROUNDS+5
with summary_writer.as_default():
  for round_num in range(NUM_ROUNDS,finetuning_rounds):
    federated_train_data = make_federated_data(federated_train_dataset, federated_train_dataset.client_ids,training=True)
    state, metrics = iterative_process.next(state, federated_train_data)
    train=metrics['train']
    specificity=train['true_negatives']/(train['true_negatives']+train['false_positives'])
    sensitivity=train['true_positives']/(train['true_positives']+train['false_negatives'])
    print('round {:2d}, metrics={}'.format(round_num, metrics['train']),'(Specificity,',specificity,') (Sensitivity,',sensitivity,')')
    for name, value in metrics['train'].items():
        tf.summary.scalar(name, value, step=round_num)
        if(name=='recall'):
          break

# **Output the results to graph visualisations**

In [None]:
!ls {logdir}
%tensorboard --logdir {logdir} --port=0

# **Evaluation on the test set**

In [None]:
evaluation=tff.learning.build_federated_evaluation(lambda: finetune_federated_model(model_weights))

In [None]:
federated_test_data = make_federated_data(federated_test_dataset, federated_test_dataset.client_ids,training=False)

In [None]:
test_metrics = evaluation(state.model, federated_test_data)

In [None]:
test=test_metrics['eval']
specificity=test['true_negatives']/(test['true_negatives']+test['false_positives'])
sensitivity=test['true_positives']/(test['true_positives']+test['false_negatives'])
accuracy=test['binary_accuracy']
print('Evaluation Metrics: (Accuracy:)',accuracy,'(Specificity:)',specificity,'(Sensitivity:)',sensitivity)

In [None]:
# Uncomment and run this cell to remove old outputs from the directory so new results can be seen on tensorboard

# !rm -R /tmp/logs/scalars/*