In [1]:
import wandb
from wandb.keras import WandbCallback

import numpy as np
from typing import Union
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import Model, Input, Sequential
from tensorflow.keras.layers.experimental import preprocessing
from tensorflow.keras import (
    layers,
    losses,
    metrics,
    datasets,
    mixed_precision,
    optimizers,
)

In [2]:
CONFIGS = {
    "dataset_name": "CIFAR-10",
    "image_size": 32,
    "target_size": 72,
    "patch_size": 9,
    "num_mixer_layers": 4,
    "embedding_dim": 128,
    "channels_mlp_dim": 128,
    "num_classes": 10,
    "dropout": 0.25,
    "batch_size": 512,
    "learning_rate": 0.001,
    "epochs": 50,
    "label_smoothing": 0.0,
    "mixed_precision": True,
    "class_names": [
        "airplane", "automobile", "bird", "cat",
        "deer", "dog", "frog", "horse", "ship", "truck"
    ]
}

In [3]:
if CONFIGS["mixed_precision"]:
    mixed_precision.set_global_policy("mixed_float16")

INFO:tensorflow:Mixed precision compatibility check (mixed_float16): OK
Your GPU will likely run quickly with dtype policy mixed_float16 as it has compute capability of at least 7.0. Your GPU: NVIDIA RTX A6000, compute capability 8.6


2022-01-25 22:42:29.393952: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-01-25 22:42:29.423504: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-01-25 22:42:29.424735: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-01-25 22:42:29.426499: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero


In [4]:
def get_cifar10(num_classes: int):
    (x_train, y_train), (x_test, y_test) = datasets.cifar10.load_data()
    y_train = keras.utils.to_categorical(y_train, num_classes)
    y_test = keras.utils.to_categorical(y_test, num_classes)
    return (x_train, y_train), (x_test, y_test)


(x_train, y_train), (x_test, y_test) = get_cifar10(num_classes=10)
print("x_train.shape:", x_train.shape)
print("y_train.shape:", y_train.shape)
print("x_test.shape:", x_test.shape)
print("y_test.shape:", y_test.shape)

x_train.shape: (50000, 32, 32, 3)
y_train.shape: (50000, 10)
x_test.shape: (10000, 32, 32, 3)
y_test.shape: (10000, 10)


In [5]:
def get_preprocessing_layer(
    data_batch: Union[np.ndarray, tf.Tensor], target_size: int
) -> Sequential:
    normalization = preprocessing.Normalization()
    normalization.adapt(data_batch)
    resize = preprocessing.Resizing(target_size, target_size)
    return Sequential([normalization, resize], name="preprocessing")


def get_augmentation_layer() -> Sequential:
    return keras.Sequential(
        [
            preprocessing.RandomFlip("horizontal"),
            preprocessing.RandomRotation(factor=0.02),
            preprocessing.RandomZoom(height_factor=0.2, width_factor=0.2),
        ],
        name="augmentation",
    )

In [6]:
def patch_embedding(
    inputs: tf.Tensor, embedding_dim: int, patch_size: int
) -> tf.Tensor:
    x = layers.Conv2D(embedding_dim, kernel_size=patch_size, strides=patch_size)(inputs)
    return layers.Reshape((x.shape[1] * x.shape[2], x.shape[3]))(x)


def mlp_block(inputs: tf.Tensor, mlp_dim: int) -> tf.Tensor:
    x = layers.Dense(mlp_dim)(inputs)
    x = layers.Activation("gelu")(x)
    return layers.Dense(x.shape[-1])(x)


def mixer_block(inputs: tf.Tensor, tokens_mlp_dim, channels_mlp_dim) -> tf.Tensor:
    y = layers.LayerNormalization()(inputs)
    y = layers.Permute((2, 1))(y)
    # Token Mixing
    y = mlp_block(y, tokens_mlp_dim)
    y = layers.Permute((2, 1))(y)
    x = layers.Add()([inputs, y])
    # Channel Mixing
    y = layers.LayerNormalization()(x)
    y = mlp_block(y, channels_mlp_dim)
    return layers.Add()([x, y])


def get_mlp_mixer_model(
    num_mixer_blocks: int,
    patch_size: int,
    embedding_dim: int,
    channels_mlp_dim: int,
    num_classes: int,
    preprocessing_layer: Union[Sequential, None],
    augmentation_layer: Union[Sequential, None],
) -> Model:
    inputs = Input(shape=(CONFIGS["image_size"], CONFIGS["image_size"], 3))
    preprocessed_inputs = (
        preprocessing_layer(inputs) if preprocessing_layer is not None else inputs
    )
    augmented_inputs = (
        augmentation_layer(preprocessed_inputs)
        if augmentation_layer is not None
        else preprocessed_inputs
    )
    x = patch_embedding(augmented_inputs, embedding_dim, patch_size)
    tokens_mlp_dim = x.shape[-2]
    for _ in range(num_mixer_blocks):
        x = mixer_block(x, tokens_mlp_dim, channels_mlp_dim)
    x = layers.LayerNormalization()(x)
    x = layers.Dropout(CONFIGS["dropout"])(x)
    x = layers.GlobalAveragePooling1D()(x)
    outputs = layers.Dense(num_classes, activation="softmax", dtype="float32")(x)
    return Model(inputs, outputs, name="mlp_mixer")

In [7]:
wandb.login(relogin=True)
run = wandb.init(
    project='mlp-mixer',
    name="cifar-10-mixed-precision",
    config=CONFIGS
)

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize


[34m[1mwandb[0m: Paste an API key from your profile and hit enter, or press ctrl+c to quit:  ········································


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /home/paperspace/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mgeekyrakshit[0m (use `wandb login --relogin` to force relogin)


In [8]:
model = get_mlp_mixer_model(
    num_mixer_blocks=CONFIGS["num_mixer_layers"],
    patch_size=CONFIGS["patch_size"],
    embedding_dim=CONFIGS["embedding_dim"],
    channels_mlp_dim=CONFIGS["channels_mlp_dim"],
    num_classes=CONFIGS["num_classes"],
    preprocessing_layer=get_preprocessing_layer(
        data_batch=x_train, target_size=CONFIGS["target_size"]
    ),
    augmentation_layer=get_augmentation_layer(),
)
model.summary()

2022-01-25 22:42:42.345932: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-01-25 22:42:42.346421: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-01-25 22:42:42.347754: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-01-25 22:42:42.348902: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so retur

Model: "mlp_mixer"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 32, 32, 3)]  0                                            
__________________________________________________________________________________________________
preprocessing (Sequential)      (None, 72, 72, 3)    7           input_1[0][0]                    
__________________________________________________________________________________________________
augmentation (Sequential)       (None, 72, 72, 3)    0           preprocessing[0][0]              
__________________________________________________________________________________________________
conv2d (Conv2D)                 (None, 8, 8, 128)    31232       augmentation[0][0]               
__________________________________________________________________________________________

In [9]:
model.compile(
    optimizer=optimizers.Adam(learning_rate=CONFIGS["learning_rate"]),
    loss=losses.CategoricalCrossentropy(),
    metrics=[
        metrics.CategoricalAccuracy(name="accuracy"),
        metrics.TopKCategoricalAccuracy(3, name="top-3-accuracy"),
        metrics.TopKCategoricalAccuracy(5, name="top-5-accuracy"),
    ],
)

In [10]:
wandb_callback = WandbCallback(
    data_type='image',
    save_model=True,
    training_data=(x_train, y_train),
    validation_data=(x_test, y_test),
    labels=CONFIGS["class_names"]
)

history = model.fit(
    x=x_train,
    y=y_train,
    batch_size=CONFIGS["batch_size"],
    epochs=CONFIGS["epochs"],
    validation_split=0.1,
    callbacks=[wandb_callback]
)

Epoch 1/50


2022-01-25 22:42:49.173335: I tensorflow/stream_executor/cuda/cuda_dnn.cc:369] Loaded cuDNN version 8201
2022-01-25 22:42:50.324613: I tensorflow/core/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory
2022-01-25 22:42:50.324893: I tensorflow/core/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory
2022-01-25 22:42:50.324912: W tensorflow/stream_executor/gpu/asm_compiler.cc:77] Couldn't get ptxas version string: Internal: Couldn't invoke ptxas --version
2022-01-25 22:42:50.325179: I tensorflow/core/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory
2022-01-25 22:42:50.326021: W tensorflow/stream_executor/gpu/redzone_allocator.cc:314] Internal: Failed to launch ptxas
Relying on driver to perform ptx compilation. 
Modify $PATH to customize ptxas location.
This message will be only logged once.
2022-01-25 22:42:51.400157: I tensorflow/stream_executor/cuda/c

Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [11]:
loss, accuracy, top_3_accuracy, top_5_accuracy = model.evaluate(x_test, y_test)

wandb.log({'Test Accuracy': round((1 - accuracy) * 100, 2)})
wandb.log({'Test Top 3 Accuracy': round((1 - top_3_accuracy) * 100, 2)})
wandb.log({'Test Top 5 Accuracy': round((1 - top_5_accuracy) * 100, 2)})

run.finish()



0,1
Test Accuracy,▁
Test Top 3 Accuracy,▁
Test Top 5 Accuracy,▁
accuracy,▁▃▄▄▅▅▅▅▆▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇███████████
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
loss,█▆▅▅▄▄▄▄▄▃▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁
top-3-accuracy,▁▄▅▅▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇█████████████████
top-5-accuracy,▁▅▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇██████████████████████
val_accuracy,▁▂▃▃▄▅▅▅▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇████████████████
val_loss,█▆▅▅▄▄▃▃▃▃▃▃▂▂▂▂▂▂▂▂▁▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
Test Accuracy,27.75
Test Top 3 Accuracy,6.99
Test Top 5 Accuracy,1.98
accuracy,0.8046
best_epoch,41.0
best_val_loss,0.78783
epoch,49.0
loss,0.54359
top-3-accuracy,0.967
top-5-accuracy,0.99298
