In [3]:
import tensorflow as tf
from matplotlib import pyplot as plt
import numpy as np

In [4]:
import os
import sys

from google.colab import drive 
drive.mount("/content/drive/", force_remount=True) 
colab_path = ("/content/drive/My Drive/colab/final_project/")
sys.path.append(colab_path)

IMG_PATH = "/content/drive/My Drive/colab/final_project/data/images"

Mounted at /content/drive/


Code and Concept Mostly based on: https://keras.io/examples/vision/mlp_image_classification/#build-a-classification-model

In [5]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

class Patches(layers.Layer):
    """
    https://www.tensorflow.org/api_docs/python/tf/image/extract_patches

    For an image, extract square 'patches' of pixels in regular, deterministic
    pattern. 

    Note that patch extraction has no learnable parameters, so it is not a
    dynamic part of the network. 
    """
    def __init__(self, patch_size, num_patches):
        super(Patches, self).__init__()
        self.patch_size = patch_size
        self.num_patches = num_patches

    def call(self, images):
        batch_size = tf.shape(images)[0]
        
        patches = tf.image.extract_patches(
            images=images,
            sizes=[1, self.patch_size, self.patch_size, 1],
            strides=[1, self.patch_size, self.patch_size, 1],
            rates=[1, 1, 1, 1],
            padding="VALID",
        )
        patch_dims = patches.shape[-1]
        patches = tf.reshape(patches, [batch_size, self.num_patches, patch_dims])
        return patches

In [6]:
class FNetLayer(layers.Layer):
    """
    https://arxiv.org/abs/2105.03824

    FNet: Mixing Tokens with Fourier Transforms

    We show that Transformer encoder architectures can be sped up, with 
    limited accuracy costs, by replacing the self-attention sublayers 
    with simple linear transformations that "mix" input tokens.

    ...

    FNet has a light memory footprint and is particularly efficient at 
    smaller model sizes; for a fixed speed and accuracy budget, 
    small FNet models outperform Transformer counterparts.
    """
    def __init__(self, num_patches, embedding_dim, dropout_rate, *args, **kwargs):
        super(FNetLayer, self).__init__(*args, **kwargs)

        self.ffn = keras.Sequential(
            [
                layers.Dense(units=embedding_dim, activation='relu'),
                layers.Dropout(rate=dropout_rate),
                layers.Dense(units=embedding_dim),
            ]
        )

        self.normalize1 = layers.LayerNormalization(epsilon=1e-6)
        self.normalize2 = layers.LayerNormalization(epsilon=1e-6)

    def call(self, inputs):
        # extract features using convolution
        # Apply fourier transformations.
        x = tf.cast(
            tf.signal.fft2d(tf.cast(inputs, dtype=tf.dtypes.complex64)),
            dtype=tf.dtypes.float32,
        )

        # Add skip connection.
        x = x + inputs
        # Apply layer normalization.
        x = self.normalize1(x)
        # Apply Feedfowrad network.
        x_ffn = self.ffn(x)
        # Add skip connection.
        x = x + x_ffn
        # Apply layer normalization.
        return self.normalize2(x)

In [13]:
def build_model(input_shape, num_classes, patch_size=8, num_blocks=4, dropout_rate=0.2, embedding_dim=256):
    # single image dimensions
    width, height, channels = input_shape
    inputs = layers.Input(shape=input_shape)
    num_strides = 2

    # Convolution layers
    x = tf.keras.layers.Conv2D(filters=3, kernel_size=3, activation='relu', padding='same')(inputs)
    x = tf.keras.layers.BatchNormalization()(x)
    x = tf.keras.layers.Dropout(.3)(x)

    # --- stride layers ---
    x = tf.keras.layers.Conv2D(filters=64, kernel_size=3, strides=(2, 2), activation='relu', padding='same')(x)
    x = tf.keras.layers.BatchNormalization()(x)
    x = tf.keras.layers.Dropout(.1)(x)

    x = tf.keras.layers.Conv2D(filters=64, kernel_size=3, strides=(2, 2), activation='relu', padding='same')(x)
    x = tf.keras.layers.BatchNormalization()(x)
    x = tf.keras.layers.Dropout(.1)(x)
    # -------

    x = tf.keras.layers.Conv2D(filters=64, kernel_size=3, activation='relu', padding='same')(x)
    x = tf.keras.layers.BatchNormalization()(x)
    x = tf.keras.layers.Dropout(.1)(x)
    

    # larger convolutions once image is smaller
    x = tf.keras.layers.Conv2D(filters=128, kernel_size=3, activation='relu', padding='same')(x)
    x = tf.keras.layers.BatchNormalization()(x)
    x = tf.keras.layers.Dropout(.1)(x)

    x = tf.keras.layers.Conv2D(filters=256, kernel_size=3, activation='relu', padding='same')(x)
    x = tf.keras.layers.BatchNormalization()(x)
    x = tf.keras.layers.Dropout(.1)(x)

    # number of patches depends on desired size of path relative to
    # image AFTER convolution is applied
    num_patches = ((width//2**num_strides) // patch_size) ** 2  

    # Create patches.
    patches = Patches(patch_size, num_patches)(x)

    # Encode patches to generate a [batch_size, num_patches, embedding_dim] tensor.
    x = layers.Dense(units=embedding_dim)(patches)

    # use positional encoding for FNet
    positions = tf.range(start=0, limit=num_patches, delta=1)
    position_embedding = layers.Embedding(
        input_dim=num_patches, output_dim=embedding_dim
    )(positions)
    x = x + position_embedding
    
    # Process patches using n FNets
    fnet_blocks = keras.Sequential(
        [
            FNetLayer(num_patches, embedding_dim, dropout_rate) for _ in range(num_blocks)
        ]
    )
    x = fnet_blocks(x)

    # Apply global average pooling to generate a [batch_size, embedding_dim] 
    # representation tensor.
    representation = layers.GlobalAveragePooling1D()(x)
    
    # Apply dropout.
    representation = layers.Dropout(rate=dropout_rate)(representation)
    
    # Compute logits outputs.
    logits = layers.Dense(num_classes, activation='softmax')(representation)

    # Create the Keras model.
    return keras.Model(inputs=inputs, outputs=logits)

>To avoid biasing the annotation for easily classifiable cell images, separate classes were included for artefacts, cells that could not be identified, and other cells belonging to morphological classes not represented in the scheme. From the annotated regions, 250 x 250-pixel images were extracted containing the respective annotated cell as a main content in the patch center (Figure 1A). No further cropping, filtering, or segmentation between foreground and background took place, leaving the algorithm with the task of identifying the main image content relevant for the respective annotation.

- Matek, Krappe, et. al pp. 1918, "Highly accurate differentiation of bone marrow cell
morphologies using deep neural networks on a large image
data set"

In [8]:
# Properties of our dataset
IMG_DIM = 128
BATCH_SIZE = 200
IMAGE_SHAPE = (IMG_DIM, IMG_DIM, 3)

# get all the data
train_ds = tf.keras.preprocessing.image_dataset_from_directory(
    directory=IMG_PATH,
    label_mode='categorical',
    validation_split=0.2,
    subset="training",
    seed=1337,
    image_size=(IMG_DIM, IMG_DIM),
    batch_size=BATCH_SIZE,
)
val_ds = tf.keras.preprocessing.image_dataset_from_directory(
    directory=IMG_PATH,
    validation_split=0.2,
    subset="validation",
    label_mode='categorical',
    seed=1337,
    image_size=(IMG_DIM, IMG_DIM),
    batch_size=BATCH_SIZE,
)

Found 66438 files belonging to 21 classes.
Using 53151 files for training.
Found 66438 files belonging to 21 classes.
Using 13287 files for validation.


In [14]:
# Size of the patches in pixels be extracted from convolved features
# ~0.25 of side length worked well with 32x32
PATCH_SIZE = 8

# Number of FNET blocks
# More blocks greatly decreases training time
NUM_BLOCKS = 4

# Number of hidden units in each FNET block
HIDDEN_SIZE = 256
NUM_CLASSES = 21

model = build_model(
    input_shape=IMAGE_SHAPE, 
    num_classes=NUM_CLASSES, 
    patch_size=PATCH_SIZE, 
    num_blocks=NUM_BLOCKS, 
    embedding_dim=HIDDEN_SIZE
)
model.summary()

Model: "model_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_3 (InputLayer)        [(None, 128, 128, 3)]     0         
                                                                 
 conv2d_13 (Conv2D)          (None, 128, 128, 3)       84        
                                                                 
 batch_normalization_13 (Bat  (None, 128, 128, 3)      12        
 chNormalization)                                                
                                                                 
 dropout_23 (Dropout)        (None, 128, 128, 3)       0         
                                                                 
 conv2d_14 (Conv2D)          (None, 64, 64, 64)        1792      
                                                                 
 batch_normalization_14 (Bat  (None, 64, 64, 64)       256       
 chNormalization)                                          

In [None]:
MODEL_PERFORMANCE_METRICS = [
    # make sure your classes are one-hot encoded
    tf.keras.metrics.CategoricalAccuracy(name="accuracy"),
    tf.keras.metrics.Precision(name='precision'),
    tf.keras.metrics.Recall(name='recall'),
    tf.keras.metrics.AUC(name='auc'),
    # precision recall curve
    tf.keras.metrics.AUC(name='prc', curve='PR'), 
]
model.compile(
    optimizer='adam',
    loss=keras.losses.CategoricalCrossentropy(from_logits=True),
    metrics=MODEL_PERFORMANCE_METRICS,
)
history = model.fit(
    train_ds,
    batch_size=BATCH_SIZE,
    epochs = 20,
    validation_data=val_ds,
)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20