The goal of this assignment is to devise the best possible model for CIFAR-10. You can load the data using the cifar10.py module. Note that the test set is different than that of official CIFAR-10.

The task is a competition. Everyone who submits a solution which achieves at least 65% test set accuracy will get 5 points; the rest 5 points will be distributed depending on relative ordering of your solutions. Note that my solutions usually need to achieve around ~80% on the development set to score 65% on the test set.

You may want to start with the cifar_competition.py template which generates the test set annotation in the required format.

In [1]:
#!/usr/bin/env python3
# 53907afe-531b-11ea-a595-00505601122b
# b7ea974c-d389-11e8-a4be-00505601122b

In [2]:
!cp /kaggle/input/cifar10/cifar10.py /kaggle/working/cifar10.py
!cp /kaggle/input/cifar10-data/cifar10_competition.npz /kaggle/working/cifar10_competition.npz

In [3]:
#!pip install -U tensorflow==2.8 tensorflow-addons==0.16.1 tensorflow-probability==0.16.0 tensorflow-hub==0.12.0 scipy
!pip freeze | grep tensorflow

tensorflow @ file:///opt/conda/conda-bld/dlenv-tf-2-6-gpu_1639878970787/work/tensorflow-2.6.2-cp37-cp37m-linux_x86_64.whl
tensorflow-addons==0.14.0
tensorflow-cloud==0.1.14
tensorflow-datasets==4.3.0
tensorflow-estimator==2.6.0
tensorflow-gcs-config==2.6.0
tensorflow-hub==0.12.0
tensorflow-io==0.21.0
tensorflow-metadata==1.5.0
tensorflow-probability==0.14.1
tensorflow-serving-api==2.7.0
tensorflow-transform==1.5.0


In [4]:
import argparse
import datetime
import os
import re

os.environ.setdefault("TF_CPP_MIN_LOG_LEVEL", "2")  # Report only TF errors by default

import numpy as np
import tensorflow as tf

from cifar10 import CIFAR10

In [5]:
parser = argparse.ArgumentParser()
parser.add_argument("--batch_size", default=None, type=int, help="Batch size.")
parser.add_argument("--epochs", default=None, type=int, help="Number of epochs.")
parser.add_argument("--seed", default=42, type=int, help="Random seed.")
parser.add_argument("--threads", default=1, type=int, help="Maximum number of threads to use.")
parser.add_argument("--checkpoints_period", default=None, type=int, help="Checkpoint callback period.")
parser.add_argument("--stopping_patience", default=None, type=int, help="Early stopping epochs patience.")
parser.add_argument("--densenet_filters", default=32, type=int, help="")
parser.add_argument("--densenet_block_sizes", nargs="+", type=int, default=[6, 12, 24, 16], help="Individual dense block sizes") # default being DenseNet-121
parser.add_argument("--label_smoothing", default=None, type=float, help="")
parser.add_argument("--learning_rate", default=0.01, type=float, help="Initial model learning rate.")

args = parser.parse_args([
    '--batch_size=128',
    '--epochs=50',
    '--checkpoints_period=3',
    '--stopping_patience=3',
    '--densenet_filters=32',
    '--densenet_block_sizes', '3', '6', '10', '8',
    '--label_smoothing=0.1'
] if "__file__" not in globals() else None)

# Create logdir name
args.logdir = os.path.join(
    "logs",
    "{}-{}-{}".format(
        os.path.basename(globals().get("__file__", "notebook")),
        datetime.datetime.now().strftime("%Y-%m-%d_%H%M%S"),
        ",".join(
            (
                "{}={}".format(re.sub("(.)[^_]*_?", r"\1", k), v)
                for k, v in sorted(vars(args).items())
            )
        ),
    ),
)

#tf.keras.utils.set_random_seed(args.seed)
tf.random.set_seed(args.seed) # tf2.6 (I have gpu issues on tf2.8 unfortunately)
tf.config.threading.set_inter_op_parallelism_threads(args.threads)
tf.config.threading.set_intra_op_parallelism_threads(args.threads)

args

Namespace(batch_size=128, checkpoints_period=3, densenet_block_sizes=[3, 6, 10, 8], densenet_filters=32, epochs=50, label_smoothing=0.1, learning_rate=0.01, logdir='logs/notebook-2022-03-23_135739-bs=128,cp=3,dbs=[3, 6, 10, 8],df=32,e=50,ls=0.1,lr=0.01,s=42,sp=3,t=1', seed=42, stopping_patience=3, threads=1)

In [6]:
cifar = CIFAR10()

if args.label_smoothing:
    cifar.train.data['labels'] = tf.keras.utils.to_categorical(
        cifar.train.data['labels'],
        num_classes=cifar.LABELS
    )
    cifar.dev.data['labels'] = tf.keras.utils.to_categorical(
        cifar.dev.data['labels'],
        num_classes=cifar.LABELS
    )
    
args.decay_steps = int(args.epochs * cifar.train.size / args.batch_size)
    
print(cifar.train.data['images'].shape, cifar.train.data['labels'].shape)
print(CIFAR10.LABELS, CIFAR10.LABEL_NAMES)
print(CIFAR10.H, CIFAR10.W, CIFAR10.C)

(45000, 32, 32, 3) (45000, 10)
10 ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']
32 32 3


In [7]:
# Helper function to create denseblocks
# instead of doing Conv -> BN -> Activ we do
# BN -> Activ -> Conv so we can easily
# return Conv layer and concatenate it into
# the dense block
def _dense_block_part(hidden, filters, kernel, activation="relu"):
    hidden = tf.keras.layers.BatchNormalization()(hidden)
    hidden = tf.keras.layers.Activation(activation)(hidden)
    return tf.keras.layers.Conv2D(filters, kernel, 1, padding="same")(hidden)

def dense_block(hidden, filters, dense_block_size):
    for _ in range(dense_block_size):
        hidden_part = _dense_block_part(hidden, 4 * filters, 1) # 1x1 kernel conv layer
        hidden_part = _dense_block_part(hidden_part, filters, 3) # 3x3 kernel conv layer
        hidden = tf.keras.layers.Concatenate()([hidden_part, hidden]) # append output to residualy connected hidden inputs
    return hidden

def transition_layer(hidden):
    hidden = _dense_block_part(hidden, hidden.shape[-1] // 2, 1)
    return tf.keras.layers.MaxPool2D(2, 2, padding="same")(hidden)
    

# Architecture inspired from DenseNet121 (https://arxiv.org/pdf/1608.06993.pdf)
# mostly just reduced parameters and other specifics
# 
# Idea of _dense_block_part doing BN -> Activ -> Conv instead of Conv -> BN -> Activ
# is taken from https://towardsdatascience.com/creating-densenet-121-with-tensorflow-edbc08a956d8
def build_denselike_net(filters, dense_block_sizes):    
    inputs = tf.keras.layers.Input(shape=[CIFAR10.H, CIFAR10.W, CIFAR10.C])
    hidden = tf.keras.layers.Conv2D(64, 7, 2, "same")(inputs)
    hidden = tf.keras.layers.MaxPooling2D(3, 2)(hidden)
    
    for dense_block_size in dense_block_sizes:
        hidden_part = dense_block(hidden, filters, dense_block_size)
        hidden = transition_layer(hidden_part)
    
    hidden = tf.keras.layers.GlobalAveragePooling2D()(hidden_part)
    #hidden = tf.keras.layers.Flatten()(hidden)
    #hidden = tf.keras.layers.Dense(128)(hidden)
    #hidden = tf.keras.layers.BatchNormalization()(hidden)
    #hidden = tf.keras.layers.Activation('relu')(hidden)
    outputs = tf.keras.layers.Dense(CIFAR10.LABELS, activation="softmax")(hidden)
    model = tf.keras.Model(inputs=inputs, outputs=outputs)
    return model

model = build_denselike_net(args.densenet_filters, args.densenet_block_sizes)

if args.label_smoothing:
    loss = tf.losses.CategoricalCrossentropy(label_smoothing=args.label_smoothing)
    metrics = [tf.metrics.CategoricalAccuracy(name="accuracy")]
else:
    loss = tf.losses.SparseCategoricalCrossentropy(label_smoothing=args.label_smoothing)
    metrics = [tf.metrics.SparseCategoricalAccuracy(name="accuracy")]

model.compile(
    optimizer=tf.optimizers.Adam(learning_rate=tf.keras.optimizers.schedules.CosineDecay(args.learning_rate, args.decay_steps)),
    loss=loss,
    metrics=metrics,
)
model.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 32, 32, 3)]  0                                            
__________________________________________________________________________________________________
conv2d (Conv2D)                 (None, 16, 16, 64)   9472        input_1[0][0]                    
__________________________________________________________________________________________________
max_pooling2d (MaxPooling2D)    (None, 7, 7, 64)     0           conv2d[0][0]                     
__________________________________________________________________________________________________
batch_normalization (BatchNorma (None, 7, 7, 64)     256         max_pooling2d[0][0]              
______________________________________________________________________________________________

In [8]:
callbacks = []
tb_callback = tf.keras.callbacks.TensorBoard(args.logdir)
callbacks.append(tb_callback)
if args.checkpoints_period:
    checkpoints = tf.keras.callbacks.ModelCheckpoint('weights{epoch:08d}.h5', save_weights_only=True, period=args.checkpoints_period) 
    callbacks.append(checkpoints)
if args.stopping_patience:
    early_stopping = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=args.stopping_patience)
    callbacks.append(early_stopping)
    
train_generator = tf.keras.preprocessing.image.ImageDataGenerator(
    rotation_range=20,
    zoom_range=0.2,
    width_shift_range=0.1,
    height_shift_range=0.1,
    horizontal_flip=True,
)

logs = model.fit(
    train_generator.flow(
        cifar.train.data["images"],
        cifar.train.data["labels"],
        batch_size=args.batch_size,
        seed=args.seed,
    ),
    shuffle=False,
    epochs=args.epochs,
    validation_data=(cifar.dev.data["images"], cifar.dev.data["labels"]),
    callbacks=[tb_callback, early_stopping, checkpoints],
)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


AttributeError: 'History' object has no attribute 'items'

In [9]:
os.makedirs(args.logdir, exist_ok=True)
with open(
    os.path.join(args.logdir, "cifar_competition_test.txt"), "w", encoding="utf-8"
) as predictions_file:
    for probs in model.predict(
        cifar.test.data["images"], batch_size=args.batch_size
    ):
        print(np.argmax(probs), file=predictions_file)