<a href="https://colab.research.google.com/github/tylerb55/COMP530/blob/main/IraqCNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
! git clone https://github.com/tylerb55/COMP530.git

Cloning into 'COMP530'...
remote: Enumerating objects: 1242, done.[K
remote: Counting objects: 100% (316/316), done.[K
remote: Compressing objects: 100% (313/313), done.[K
remote: Total 1242 (delta 28), reused 0 (delta 0), pack-reused 926[K
Receiving objects: 100% (1242/1242), 159.90 MiB | 23.32 MiB/s, done.
Resolving deltas: 100% (40/40), done.
Checking out files: 100% (1157/1157), done.


In [3]:
! pip install --quiet --upgrade tensorflow-federated

[K     |████████████████████████████████| 819 kB 5.2 MB/s 
[K     |████████████████████████████████| 237 kB 47.1 MB/s 
[K     |████████████████████████████████| 121 kB 68.4 MB/s 
[K     |████████████████████████████████| 53 kB 1.8 MB/s 
[K     |████████████████████████████████| 4.0 MB 35.9 MB/s 
[K     |████████████████████████████████| 887 kB 52.6 MB/s 
[K     |████████████████████████████████| 65.1 MB 91 kB/s 
[K     |████████████████████████████████| 45 kB 3.1 MB/s 
[K     |████████████████████████████████| 251 kB 59.4 MB/s 
[K     |████████████████████████████████| 462 kB 59.2 MB/s 
[K     |████████████████████████████████| 4.2 MB 25.4 MB/s 
[?25h  Building wheel for jax (setup.py) ... [?25l[?25hdone
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
spacy 2.2.4 requires tqdm<5.0.0,>=4.38.0, but you have tqdm 4.28.1 which is incompatible.
py

Put in the necessary upgrades for the environment to prevent potential compatability errors in future code.

In [4]:
! pip install --quiet --upgrade nest-asyncio

In [5]:
import numpy as np
import pandas as pd
import collections
import matplotlib.image as img
import matplotlib.pyplot as plt
import tensorflow as tf
import tensorflow_federated as tff
import tensorflow_datasets as tfds
import nest_asyncio as na

# **Preprocessing the input data**

In [6]:
#Have a play around with number of clients and batch size and see how they affect the model
NUM_CLIENTS=5 
NUM_EPOCHS=100
BATCH_SIZE=20
SHUFFLE_BUFFER=100
PREFETCH_BUFFER=10

def preprocess(dataset):

  def format_batch(element):
    """flatten the pixels in a batch and return the features as an 'OrderedDict'"""
    return collections.OrderedDict(
        x=tf.reshape(element['image'],[-1,262144]),
        y=tf.reshape(element['label'],[-1,1])#the shape of x may change as we may utilise a smaller image than 512x512 pixels
    )
    return dataset.repeat(NUM_EPOCHS).shuffle(SHUFFLE_BUFFER, seed=1).batch(BATCH_SIZE).map(format_batch).prefetch(PREFETCH_BUFFER)


maybe store the dataset in cache memory to speed up training instead of loading off disk. (dataset.cache())

# **Image Directory to Dataset**

In [7]:
def Train_Test_Set(directory_name):
  """a function to load the images in a large directory into a tensorflow dataset object
  the data is split 90:10 in training:validation. The dataset is shuffled before splitting
  and the images are formatted to 512x512 pixels and grayscale(one color channel and the values range from 0 to 255)"""
  dataset_train=tf.keras.preprocessing.image_dataset_from_directory('/content/COMP530/'+directory_name,
                                                                    labels='inferred',
                                                                    label_mode='int',
                                                                    class_names=['NormalCases','cancercases'],
                                                                    color_mode='grayscale',
                                                                    image_size=(512,512),
                                                                    shuffle=True,
                                                                    seed=305,
                                                                    validation_split=0.1,
                                                                    subset='training',
                                                                    batch_size=None
                                                                    )

  dataset_test=tf.keras.preprocessing.image_dataset_from_directory('/content/COMP530/'+directory_name,
                                                                    labels='inferred',
                                                                    label_mode='int',
                                                                    class_names=['NormalCases','cancercases'],
                                                                    color_mode='grayscale',
                                                                    image_size=(512,512),
                                                                    shuffle=True,
                                                                    seed=305,
                                                                    validation_split=0.1,
                                                                    subset='validation',
                                                                    batch_size=None
                                                                    )

  return dataset_train,dataset_test


# **Federated Dataset Preparation**

In [8]:
def federated_training_set(Dataset,clients):
  """
  args:
  Dataset - the dataset object to be passed into the function and separated between the clients in the simulation
  clients - the number of clients for the dataset to have come from in the simulated environment
  return:
  dataset_by_client - the original dataset federated between the number of clients. 
  each client is labelled by their client id, in a dictionary. each id points to an individual dataset object
  """
  image_count=tf.data.experimental.cardinality(Dataset).numpy()
  image_per_set=int(np.floor(image_count/clients))

  client_train_dataset=collections.OrderedDict()
  Dataset=tfds.as_numpy(Dataset)
  count=0
  client_num=1
  y=[]
  x=[]
  """this code snippet assigns particular training examples to each client. In this example they have been evenly distributed
  hetrogeneity of federated data could be explored by randomly assigning a clients to each training example.
  This would result in some clients having more than enough training data locally and some  and some suffering from data
  paucity locally. A greater reflection of real life struggles when implementing federated learning."""
  for image in Dataset:
    count+=1
    y.append(image[1])
    x.append(image[0])
    if(count==image_per_set):
      x=np.asarray(x,dtype=np.float32)
      y=np.asarray(y,dtype=np.int32)
      data=collections.OrderedDict((('label', y), ('image', x)))
      client_train_dataset["client_"+str(client_num)]=data
      count=0
      client_num+=1
      y=[]
      x=[]

  federated_dataset=tff.simulation.datasets.TestClientData(client_train_dataset)

  dataset_by_client=[]
  for i in range(len(federated_dataset.client_ids)):
    dataset_by_client.append(preprocess(federated_dataset.create_tf_dataset_for_client(federated_dataset.client_ids[i])))


  return dataset_by_client
    
dataset_1_train, dataset_1_test=Train_Test_Set("Dataset1")
federated_dataset_1=federated_training_set(dataset_1_train,NUM_CLIENTS)

Found 1097 files belonging to 2 classes.
Using 988 files for training.
Found 1097 files belonging to 2 classes.
Using 109 files for validation.


# **CNN 1**

In [9]:
#Be sure to include a rescaling layer at the start of the CNN model. This will scale the pixel values from 0-255 to 0-1 which is much easier for the cnn to interpret
#layers.Rescaling(1./255, input_shape=(-1,262144))
class_names=dataset_1_train.class_names

# **AlexNet CNN**

In [23]:
AlexNet = tf.keras.models.Sequential([
                                      tf.keras.layers.Conv2D(filters=96,kernel_size=(11,11),strides=(4,4),activation='relu',input_shape=(512,512,1)),
                                      tf.keras.layers.BatchNormalization(),
                                      tf.keras.layers.MaxPool2D(pool_size=(3,3),strides=(2,2)),
                                      tf.keras.layers.Conv2D(filters=256,kernel_size=(5,5),strides=(1,1),activation='relu',padding='same'),
                                      tf.keras.layers.BatchNormalization(),
                                      tf.keras.layers.MaxPool2D(pool_size=(3,3),strides=(2,2)),
                                      tf.keras.layers.Conv2D(filters=384,kernel_size=(3,3),strides=(1,1),activation='relu',padding='same'),
                                      tf.keras.layers.BatchNormalization(),
                                      tf.keras.layers.Conv2D(filters=384,kernel_size=(3,3),strides=(1,1),activation='relu',padding='same'),
                                      tf.keras.layers.BatchNormalization(),
                                      tf.keras.layers.Conv2D(filters=256,kernel_size=(3,3),strides=(1,1),activation='relu',padding='same'),
                                      tf.keras.layers.BatchNormalization(),
                                      tf.keras.layers.MaxPool2D(pool_size=(3,3),strides=(2,2)),
                                      tf.keras.layers.Flatten(),
                                      tf.keras.layers.Dense(4096,activation='relu'),
                                      tf.keras.layers.Dropout(0.5),
                                      tf.keras.layers.Dense(4096,activation='relu'),
                                      tf.keras.layers.Dropout(0.5),
                                      tf.keras.layers.Dense(10,activation='softmax')
])

In [24]:
AlexNet.compile(loss='sparse_categorical_crossentropy',optimizer=tf.optimizers.SGD(learning_rate=0.001),metrics=['accuracy'])
AlexNet.summary()

Model: "sequential_5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_25 (Conv2D)          (None, 126, 126, 96)      11712     
                                                                 
 batch_normalization_25 (Bat  (None, 126, 126, 96)     384       
 chNormalization)                                                
                                                                 
 max_pooling2d_15 (MaxPoolin  (None, 62, 62, 96)       0         
 g2D)                                                            
                                                                 
 conv2d_26 (Conv2D)          (None, 62, 62, 256)       614656    
                                                                 
 batch_normalization_26 (Bat  (None, 62, 62, 256)      1024      
 chNormalization)                                                
                                                      

In [22]:
AlexNet.fit(dataset_1_train,epochs=50,validation_data=dataset_1_test,validation_freq=1)

Epoch 1/50


ValueError: ignored

# **Example Image**

In [None]:
image = "/content/COMP530/Dataset1/NormalCases/Normal case (1).jpg"
plotimg=img.imread(image)
plot=plt.imshow(plotimg)
plt.show()