<a href="https://colab.research.google.com/github/tylerb55/COMP530/blob/main/FedEnvironment.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Load the dataset into colab**

In [None]:
! git clone https://github.com/tylerb55/COMP530.git

In [None]:
! pip install --upgrade tensorflow-federated==0.20.0

# **Import Necessary Libraries**

In [None]:
import numpy as np
import collections
import matplotlib.image as img
import matplotlib.pyplot as plt
import tensorflow as tf
import tensorflow_federated as tff
import tensorflow_datasets as tfds
import nest_asyncio as na
import random 

In [None]:
NUM_CLIENTS=5 
NUM_EPOCHS=10
BATCH_SIZE=32
SHUFFLE_BUFFER=100
PREFETCH_BUFFER=10

# **Load the dataset into the environment and make it a federated dataset**

In [None]:
def Train_and_Test_Set(directory_name):
  """a function to load the images in a large directory into a tensorflow dataset object
  the data is split 80:20 in training:test. The dataset is shuffled before splitting
  and the images are formatted to 512x512 pixels (pixel values range from 0 to 255)
  Images are labelled 0,1 based on the folder they are in. Normalcases are 0 and 
  cancercases are 1"""
  dataset_train=tf.keras.preprocessing.image_dataset_from_directory('/content/COMP530/'+directory_name,
                                                                    labels='inferred',
                                                                    label_mode='int',
                                                                    class_names=['NormalCases','cancercases'],
                                                                    color_mode='rgb',
                                                                    image_size=(512,512),
                                                                    shuffle=True,
                                                                    seed=305,
                                                                    validation_split=0.2,
                                                                    subset='training',
                                                                    batch_size=None
                                                                    )
  
  dataset_test=tf.keras.preprocessing.image_dataset_from_directory('/content/COMP530/'+directory_name,
                                                                    labels='inferred',
                                                                    label_mode='int',
                                                                    class_names=['NormalCases','cancercases'],
                                                                    color_mode='rgb',
                                                                    image_size=(512,512),
                                                                    shuffle=True,
                                                                    seed=305,
                                                                    validation_split=0.2,
                                                                    subset='validation',
                                                                    batch_size=None
                                                                    )

  return dataset_train,dataset_test

In [None]:
def federate_dataset(Dataset,clients):
  """
  args:
  Dataset - the dataset object to be passed into the function and separated between the clients in the simulation
  clients - the number of clients for the dataset to have come from in the simulated environment
  return:
  dataset_by_client - the original dataset federated between the number of clients. 
  each client is labelled by their client id, in a dictionary. each id points to an individual dataset object
  """
  image_count=tf.data.experimental.cardinality(Dataset).numpy()
  image_per_set=int(np.floor(image_count/clients))

  client_train_dataset=collections.OrderedDict()
  Dataset=tfds.as_numpy(Dataset)
  count=0
  client_num=1
  y=[]
  x=[]
  """this code snippet assigns particular training examples to each client. In this example they have been evenly distributed
  hetrogeneity of federated data could be explored by randomly assigning a clients to each training example.
  This would result in some clients having more than enough training data locally and some  and some suffering from data
  paucity locally. """
  for image in Dataset:
    count+=1
    y.append(image[1])
    x.append(image[0])
    if(count==image_per_set):
      x=np.asarray(x,dtype=np.float32)
      y=np.asarray(y,dtype=np.int32)
      data=collections.OrderedDict((('label', y), ('image', x)))
      client_train_dataset["client_"+str(client_num)]=data
      count=0
      client_num+=1
      y=[]
      x=[]

  federated_dataset=tff.simulation.datasets.TestClientData(client_train_dataset)
  return federated_dataset

In [None]:
def preprocess(dataset):

  def format_batch(element):
    """flatten the pixels in a batch and return the features as an 'OrderedDict'"""
    tf.image.per_image_standardization(element['image'])
    return collections.OrderedDict(
        x=tf.image.resize(element['image'],(224,224)),
        y=tf.reshape(element['label'],[-1,1])
    )
  return dataset.repeat(NUM_EPOCHS).shuffle(SHUFFLE_BUFFER, seed=1).batch(BATCH_SIZE).map(format_batch).prefetch(PREFETCH_BUFFER)

In [None]:
  data_augmentation = tf.keras.Sequential([
  tf.keras.layers.RandomFlip("horizontal_and_vertical"),
  tf.keras.layers.RandomRotation(0.2),
  ])

In [None]:
def make_federated_data(client_data,client_ids,training):
  """ the federated dataset that can be passed into the federated environemt to train or test the network """
  data_augmentation = tf.keras.Sequential([
  tf.keras.layers.RandomFlip("horizontal_and_vertical"),
  tf.keras.layers.RandomRotation(0.2),
  ])
  if training:
    client_ids=random.sample(client_ids,random.randint(1,NUM_CLIENTS))
  return[
         preprocess(client_data.create_tf_dataset_for_client(x)).map(lambda x: (data_augmentation(x['x']),x['y']))
         for x in client_ids         
  ]

In [None]:
""" the original dataset held in keras dataset objects """
dataset_train, dataset_test=Train_and_Test_Set("Dataset1")
""" the dataset split by the which client the data has come from """
federated_train_dataset=federate_dataset(dataset_train,NUM_CLIENTS)
federated_test_dataset=federate_dataset(dataset_test,NUM_CLIENTS)
""" an example dataset for a single client used to get the input specification for the federated model """
example_dataset = federated_train_dataset.create_tf_dataset_for_client(federated_train_dataset.client_ids[0])
preprocessed_example_dataset=preprocess(example_dataset)

# **Load a model pretrained on the imagenet dataset as the CNN for the environment**

In [None]:
def ResNet_model():
  base_model=tf.keras.applications.ResNet50(weights='imagenet',input_shape=(224,224,3),include_top=False) # use resnet50 as the base for the tl model
  base_model.trainable = False # freeze the resnet50 layers

  inputs = tf.keras.Input(shape=(224, 224, 3))
  x = base_model(inputs, training=False)# Convert features of shape `base_model.output_shape[1:]` to vectors
  x = tf.keras.layers.GlobalAveragePooling2D()(x)
  x = tf.keras.layers.Dropout(0.2)(x)
  x = tf.keras.layers.Dense(64,activation='relu')(x)
  x = tf.keras.layers.Dense(32,activation='relu')(x)
  x = tf.keras.layers.Dense(16,activation='relu')(x)
  outputs = tf.keras.layers.Dense(1,activation='sigmoid')(x)# A Dense classifier with a single unit (binary classification)
  model = tf.keras.Model(inputs, outputs)
  return model


In [None]:
def MobileNet_model():
  base_model=tf.keras.applications.MobileNetV2(weights='imagenet',input_shape=(224,224,3),include_top=False) # use mobilenetv2 as the base for the tl model
  base_model.trainable = False # freeze the mobilenetv2 layers

  inputs = tf.keras.Input(shape=(224, 224, 3))
  x = base_model(inputs, training=False)# Convert features of shape `base_model.output_shape[1:]` to vectors
  x = tf.keras.layers.GlobalAveragePooling2D()(x)
  x = tf.keras.layers.Dropout(0.2)(x)
  x = tf.keras.layers.Dense(64,activation='relu')(x)
  x = tf.keras.layers.Dense(32,activation='relu')(x)
  x = tf.keras.layers.Dense(16,activation='relu')(x)
  outputs = tf.keras.layers.Dense(1,activation='sigmoid')(x)# A Dense classifier with a single unit (binary classification)
  model = tf.keras.Model(inputs, outputs)
  return model

In [None]:
def DenseNet_model():
  base_model=tf.keras.applications.DenseNet121(weights='imagenet',input_shape=(224,224,3),include_top=False) # use densenet121 as the base for the tl model
  base_model.trainable = False # freeze the densenet121 layers

  inputs = tf.keras.Input(shape=(224, 224, 3))
  x = base_model(inputs, training=False)# Convert features of shape `base_model.output_shape[1:]` to vectors
  x = tf.keras.layers.GlobalAveragePooling2D()(x)
  x = tf.keras.layers.Dropout(0.2)(x)
  x = tf.keras.layers.Dense(64,activation='relu')(x)
  x = tf.keras.layers.Dense(32,activation='relu')(x)
  x = tf.keras.layers.Dense(16,activation='relu')(x)
  outputs = tf.keras.layers.Dense(1,activation='sigmoid')(x)# A Dense classifier with a single unit (binary classification)
  model = tf.keras.Model(inputs, outputs)
  return model

In [None]:
def VGG_model():
  base_model=tf.keras.applications.VGG16(weights='imagenet',input_shape=(224,224,3),include_top=False) # use vgg16 as the base for the tl model
  base_model.trainable = False # freeze the vgg16 layers

  inputs = tf.keras.Input(shape=(224, 224, 3))
  x = base_model(inputs, training=False)# Convert features of shape `base_model.output_shape[1:]` to vectors
  x = tf.keras.layers.GlobalAveragePooling2D()(x)
  x = tf.keras.layers.Dropout(0.2)(x)
  x = tf.keras.layers.Dense(64,activation='relu')(x)
  x = tf.keras.layers.Dense(32,activation='relu')(x)
  x = tf.keras.layers.Dense(16,activation='relu')(x)
  outputs = tf.keras.layers.Dense(1,activation='sigmoid')(x)# A Dense classifier with a single unit (binary classification)
  model = tf.keras.Model(inputs, outputs)
  return model

In [None]:
#resnet=ResNet_model()
#resnet.summary()
#mobilenet=MobileNet_model()
#mobilenet.summary()
#densenet=DenseNet_model()
#densenet.summary()
vgg=VGG_model()
vgg.summary()

# **Create the federated environment based on the pretrained model** 

In [None]:
def federated_model():
  resnet=ResNet_model()
  mobilenet=MobileNet_model()
  densenet=DenseNet_model()
  vgg=VGG_model()
  return tff.learning.from_keras_model(
      #resnet,
      #mobilenet,
      densenet,
      #vgg,
      input_spec=preprocessed_example_dataset.element_spec,
      loss=tf.keras.losses.BinaryCrossentropy(),
      metrics=[tf.keras.metrics.BinaryAccuracy(),tf.keras.metrics.Recall(),tf.keras.metrics.Precision(),tf.keras.metrics.TruePositives(),tf.keras.metrics.TrueNegatives(),tf.keras.metrics.FalsePositives(),tf.keras.metrics.FalseNegatives()])

In [None]:
iterative_process = tff.learning.build_federated_averaging_process(
    federated_model,
    client_optimizer_fn=lambda: tf.keras.optimizers.Adam(learning_rate=0.0001),
    server_optimizer_fn=lambda: tf.keras.optimizers.Adam(learning_rate=0.001))

In [None]:
%load_ext tensorboard

In [None]:
%reload_ext tensorboard

In [None]:
na.apply()
logdir = "/tmp/logs/scalars/training/"
summary_writer = tf.summary.create_file_writer(logdir)
state=iterative_process.initialize()
NUM_ROUNDS=101
with summary_writer.as_default():
  for round_num in range(1, NUM_ROUNDS):
    federated_train_data = make_federated_data(federated_train_dataset, federated_train_dataset.client_ids,training=True)
    state, metrics = iterative_process.next(state, federated_train_data)
    train=metrics['train']
    specificity=train['true_negatives']/(train['true_negatives']+train['false_positives'])
    sensitivity=train['recall']
    f2_score=(5*train['precision']*train['recall'])/(4*train['precision']+train['recall'])
    tf.summary.scalar('Binary Accuracy', train['binary_accuracy'], step=round_num)
    tf.summary.scalar('Specificity', specificity, step=round_num)
    tf.summary.scalar('Sensitivity', sensitivity, step=round_num)
    tf.summary.scalar('F2-Score', f2_score, step=round_num)
    tf.summary.scalar('Loss', train['loss'], step=round_num)
    print('round {:2d}, metrics={}'.format(round_num, metrics['train']),'(Specificity,',specificity,') (Sensitivity,',sensitivity,') (F2 Score',f2_score,')')
    #for name, value in metrics['train'].items():
        #tf.summary.scalar(name, value, step=round_num)
        #if(name=='recall'):
        #  break

# **Output the results to graph visualisations**

In [None]:
!ls {logdir}
%tensorboard --logdir {logdir} --port=0

# **Evaluation on the test set**

In [None]:
evaluation=tff.learning.build_federated_evaluation(federated_model)

In [None]:
federated_test_data = make_federated_data(federated_test_dataset, federated_test_dataset.client_ids,training=False)

In [None]:
test_metrics = evaluation(state.model, federated_test_data)

In [None]:
test=test_metrics['eval']
specificity=test['true_negatives']/(test['true_negatives']+test['false_positives'])
sensitivity=test['recall']
f_score=(5*test['precision']*test['recall'])/(4*test['precision']+test['recall'])
accuracy=test['binary_accuracy']
print('Evaluation Metrics: (Accuracy:)',accuracy,'(F2-Score)',f_score,'(Specificity:)',specificity,'(Sensitivity:)',sensitivity)

In [None]:
print(test)

In [None]:
# Uncomment and run this cell to remove old outputs from the directory so new results can be seen on tensorboard

#!rm -R /tmp/logs/scalars/*