# Transfer Learning

## Set up

In [1]:
# saving of efficientnet is not supported by current tf version

!pip install tensorflow==2.9.1 --quiet
import tensorflow as tf
print(tf.__version__)

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m511.7/511.7 MB[0m [31m3.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m438.7/438.7 KB[0m [31m21.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m23.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.8/5.8 MB[0m [31m61.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m69.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.6/42.6 KB[0m [31m4.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.9/4.9 MB[0m [31m58.8 MB/s[0m eta [36m0:00:00[0m
[?25h2.9.1


In [2]:
import tensorflow.keras as K
import numpy as np

# Test 0

Default parameters

In [None]:
def preprocess_data(X, Y):
    """
    Returns unpacked and preprocessed data
    """
    X_p, Y_p = X, K.utils.to_categorical(Y, num_classes=10)
    return X_p, Y_p


def build_model(init=K.initializers.he_normal(), activation='relu'):
    """
    Returns compiled model
    """
    # create input layer
    input = K.layers.Input(shape=(32, 32, 3))

    # resizing layer
    resize = K.layers.Resizing(
        height=224,
        width=224,
        interpolation="bilinear",
        crop_to_aspect_ratio=False)(input)

    # EfficientNet
    eNet = K.applications.EfficientNetV2B2(
        weights='imagenet',
        input_shape=(224, 224, 3),
        include_top=False,
        )
    eNet.trainable = False

    # Freeze previous layers
    frozen_layers = eNet(resize)

    # pooling
    pool = K.layers.AvgPool2D(
        pool_size=7,
        strides=1,
        padding='valid',
        )(frozen_layers)

    # flatten layer
    flat = K.layers.Flatten()(pool)

    # output
    output = K.layers.Dense(
        units=10,
        activation='softmax',
        kernel_initializer=init
        )(flat)
    
    # build model
    model = K.models.Model(input, output)

    # compile model
    model.compile(optimizer=K.optimizers.Adam(),
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])

    return model

if __name__ == "__main__":
    # download and preprocess data
    (x_train, y_train), (x_valid, y_valid) = K.datasets.cifar10.load_data()
    x_train, y_train = preprocess_data(x_train, y_train)
    x_valid, y_valid = preprocess_data(x_valid, y_valid)

    # define hyperparameters
    init = K.initializers.he_normal()
    activation = 'relu'
    patience = 2
    batch_size = 32
    epochs = 100
    alpha = 0.1
    decay_rate = 1

    # create model

    model = build_model(init, activation)

    model.summary()

    # callbacks
    callbacks = [K.callbacks.EarlyStopping(patience=patience),
                #  K.callbacks.LearningRateScheduler(
                #     schedule=lambda epoch: alpha / (1 + epoch * decay_rate),
                #     verbose=True),
                 K.callbacks.ModelCheckpoint(
                    filepath='cifar10.h5',
                    monitor='val_loss',
                    save_best_only=True)
                 ]

    model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs,
                    validation_data=(x_valid, y_valid), callbacks=callbacks)    


Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_3 (InputLayer)        [(None, 32, 32, 3)]       0         
                                                                 
 resizing_1 (Resizing)       (None, 224, 224, 3)       0         
                                                                 
 efficientnetv2-b2 (Function  (None, 7, 7, 1408)       8769374   
 al)                                                             
                                                                 
 average_pooling2d_1 (Averag  (None, 1, 1, 1408)       0         
 ePooling2D)                                                     
                                                                 
 flatten_1 (Flatten)         (None, 1408)              0         
                                                                 
 dense_1 (Dense)             (None, 10)                1409

# Test 1
- reduced image resizing to 150px

In [None]:
def preprocess_data(X, Y):
    """
    Returns unpacked and preprocessed data
    """
    X_p, Y_p = X, K.utils.to_categorical(Y, num_classes=10)
    return X_p, Y_p


def build_model(init=K.initializers.he_normal(), activation='relu'):
    """
    Returns compiled model
    """
    # create input layer
    input = K.layers.Input(shape=(32, 32, 3))

    # resizing layer
    resize = K.layers.Resizing(
        height=150,
        width=150,
        interpolation="bilinear",
        crop_to_aspect_ratio=False)(input)

    # EfficientNet
    eNet = K.applications.EfficientNetV2B2(
        weights='imagenet',
        input_shape=(150, 150, 3),
        include_top=False,
        )
    eNet.trainable = False

    # Freeze previous layers
    frozen_layers = eNet(resize)

    # pooling
    pool = K.layers.AvgPool2D(
        pool_size=5,
        strides=1,
        padding='valid',
        )(frozen_layers)

    # flatten layer
    flat = K.layers.Flatten()(pool)

    # output
    output = K.layers.Dense(
        units=10,
        activation='softmax',
        kernel_initializer=init
        )(flat)
    
    # build model
    model = K.models.Model(input, output)

    # compile model
    model.compile(optimizer=K.optimizers.Adam(),
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])

    return model

if __name__ == "__main__":
    # download and preprocess data
    (x_train, y_train), (x_valid, y_valid) = K.datasets.cifar10.load_data()
    x_train, y_train = preprocess_data(x_train, y_train)
    x_valid, y_valid = preprocess_data(x_valid, y_valid)

    # define hyperparameters
    init = K.initializers.he_normal()
    activation = 'relu'
    patience = 2
    batch_size = 32
    epochs = 100
    alpha = 0.1
    decay_rate = 1

    # create model

    model = build_model(init, activation)

    model.summary()

    # callbacks
    callbacks = [K.callbacks.EarlyStopping(patience=patience),
                #  K.callbacks.LearningRateScheduler(
                #     schedule=lambda epoch: alpha / (1 + epoch * decay_rate),
                #     verbose=True),
                 K.callbacks.ModelCheckpoint(
                    filepath='cifar10.h5',
                    monitor='val_loss',
                    save_best_only=True),
                 ]

    model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs,
                    validation_data=(x_valid, y_valid), callbacks=callbacks)    


Model: "model_12"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_25 (InputLayer)       [(None, 32, 32, 3)]       0         
                                                                 
 resizing_12 (Resizing)      (None, 150, 150, 3)       0         
                                                                 
 efficientnetv2-b2 (Function  (None, 5, 5, 1408)       8769374   
 al)                                                             
                                                                 
 average_pooling2d_1 (Averag  (None, 1, 1, 1408)       0         
 ePooling2D)                                                     
                                                                 
 flatten_12 (Flatten)        (None, 1408)              0         
                                                                 
 dense_45 (Dense)            (None, 10)                140

# Test 2
- Increased batch size

In [None]:
def preprocess_data(X, Y):
    """
    Returns unpacked and preprocessed data
    """
    X_p, Y_p = X, K.utils.to_categorical(Y, num_classes=10)
    return X_p, Y_p


def build_model(init=K.initializers.he_normal(), activation='relu'):
    """
    Returns compiled model
    """
    # create input layer
    input = K.layers.Input(shape=(32, 32, 3))

    # resizing layer
    resize = K.layers.Resizing(
        height=150,
        width=150,
        interpolation="bilinear",
        crop_to_aspect_ratio=False)(input)

    # EfficientNet
    eNet = K.applications.EfficientNetV2B2(
        weights='imagenet',
        input_shape=(150, 150, 3),
        include_top=False,
        )
    eNet.trainable = False

    # Freeze previous layers
    frozen_layers = eNet(resize)

    # pooling
    pool = K.layers.AvgPool2D(
        pool_size=5,
        strides=1,
        padding='valid',
        )(frozen_layers)

    # flatten layer
    flat = K.layers.Flatten()(pool)

    # output
    output = K.layers.Dense(
        units=10,
        activation='softmax',
        kernel_initializer=init
        )(flat)
    
    # build model
    model = K.models.Model(input, output)

    # compile model
    model.compile(optimizer=K.optimizers.Adam(),
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])

    return model

if __name__ == "__main__":
    # download and preprocess data
    (x_train, y_train), (x_valid, y_valid) = K.datasets.cifar10.load_data()
    x_train, y_train = preprocess_data(x_train, y_train)
    x_valid, y_valid = preprocess_data(x_valid, y_valid)

    # define hyperparameters
    init = K.initializers.he_normal()
    activation = 'relu'
    patience = 2
    batch_size = 512
    epochs = 100
    alpha = 0.1
    decay_rate = 1

    # create model

    model = build_model(init, activation)

    model.summary()

    # callbacks
    callbacks = [K.callbacks.EarlyStopping(patience=patience),
                 K.callbacks.LearningRateScheduler(
                    schedule=lambda epoch: alpha / (1 + epoch * decay_rate),
                    verbose=True),
                 K.callbacks.ModelCheckpoint(
                    filepath='cifar10.h5',
                    monitor='val_loss',
                    save_best_only=True),
                 ]

    model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs,
                    validation_data=(x_valid, y_valid), callbacks=callbacks)    


Model: "model_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_9 (InputLayer)        [(None, 32, 32, 3)]       0         
                                                                 
 resizing_4 (Resizing)       (None, 150, 150, 3)       0         
                                                                 
 efficientnetv2-b2 (Function  (None, 5, 5, 1408)       8769374   
 al)                                                             
                                                                 
 average_pooling2d_4 (Averag  (None, 1, 1, 1408)       0         
 ePooling2D)                                                     
                                                                 
 flatten_4 (Flatten)         (None, 1408)              0         
                                                                 
 dense_4 (Dense)             (None, 10)                1409

# Test 3
- Added two fully connected layers for depth after frozen layers
- Adjusted batch size back down
- Adjusted learning rate

In [None]:
def preprocess_data(X, Y):
    """
    Returns unpacked and preprocessed data
    """
    X_p, Y_p = X, K.utils.to_categorical(Y, num_classes=10)
    return X_p, Y_p


def build_model(init=K.initializers.he_normal(), activation='relu'):
    """
    Returns compiled model
    """
    # create input layer
    input = K.layers.Input(shape=(32, 32, 3))

    # resizing layer
    resize = K.layers.Resizing(
        height=150,
        width=150,
        interpolation="bilinear",
        crop_to_aspect_ratio=False)(input)

    # EfficientNet
    eNet = K.applications.EfficientNetV2B2(
        weights='imagenet',
        input_shape=(150, 150, 3),
        include_top=False,
        )
    eNet.trainable = False

    # Freeze previous layers
    frozen_layers = eNet(resize)

    # pooling
    pool = K.layers.AvgPool2D(
        pool_size=5,
        strides=1,
        padding='valid',
        )(frozen_layers)

    # flatten layer
    flat = K.layers.Flatten()(pool)

    # dense layers

    dense1 = K.layers.Dense(
        units = 512,
        activation=activation,
        kernel_initializer=init
        )(flat)

    dense2 = K.layers.Dense(
        units = 128,
        activation=activation,
        kernel_initializer=init
        )(dense1)

    # output
    output = K.layers.Dense(
        units=10,
        activation='softmax',
        kernel_initializer=init
        )(dense2)
    
    # build model
    model = K.models.Model(input, output)

    # compile model
    model.compile(optimizer=K.optimizers.Adam(),
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])

    return model

if __name__ == "__main__":
    # download and preprocess data
    (x_train, y_train), (x_valid, y_valid) = K.datasets.cifar10.load_data()
    x_train, y_train = preprocess_data(x_train, y_train)
    x_valid, y_valid = preprocess_data(x_valid, y_valid)

    # define hyperparameters
    init = K.initializers.he_normal()
    activation = 'relu'
    patience = 2
    batch_size = 32
    epochs = 100
    alpha = 0.01
    decay_rate = 0.2

    # create model

    model = build_model(init, activation)

    model.summary()

    # callbacks
    callbacks = [K.callbacks.EarlyStopping(patience=patience),
                 K.callbacks.LearningRateScheduler(
                    schedule=lambda epoch: alpha / (1 + epoch * decay_rate),
                    verbose=True
                    ),
                 K.callbacks.ModelCheckpoint(
                    filepath='cifar10.h5',
                    monitor='val_loss',
                    save_best_only=True
                    ),
                 ]

    model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs,
                    validation_data=(x_valid, y_valid), callbacks=callbacks)    


Model: "model_10"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_23 (InputLayer)       [(None, 32, 32, 3)]       0         
                                                                 
 resizing_11 (Resizing)      (None, 150, 150, 3)       0         
                                                                 
 efficientnetv2-b2 (Function  (None, 5, 5, 1408)       8769374   
 al)                                                             
                                                                 
 average_pooling2d_10 (Avera  (None, 1, 1, 1408)       0         
 gePooling2D)                                                    
                                                                 
 flatten_10 (Flatten)        (None, 1408)              0         
                                                                 
 dense_20 (Dense)            (None, 512)               721

# Test 4
- Adding dropout to reduce overfitting
- removed learning rate decay

In [None]:
def preprocess_data(X, Y):
    """
    Returns unpacked and preprocessed data
    """
    X_p, Y_p = X, K.utils.to_categorical(Y, num_classes=10)
    return X_p, Y_p


def build_model(init=K.initializers.he_normal(), activation='relu',
                keep_prob=0.5):
    """
    Returns compiled model
    """
    # set regularizer
    regularizer = K.layers.Dropout(1 - keep_prob)

    # create input layer
    input = K.layers.Input(shape=(32, 32, 3))

    # resizing layer
    resize = K.layers.Resizing(
        height=150,
        width=150,
        interpolation="bilinear",
        crop_to_aspect_ratio=False)(input)

    # EfficientNet
    eNet = K.applications.EfficientNetV2B2(
        weights='imagenet',
        input_shape=(150, 150, 3),
        include_top=False,
        )
    eNet.trainable = False

    # Freeze previous layers
    frozen_layers = eNet(resize)

    # pooling
    pool = K.layers.AvgPool2D(
        pool_size=5,
        strides=1,
        padding='valid',
        )(frozen_layers)

    # flatten layer
    flat = K.layers.Flatten()(pool)

    # dense layers
    dense1 = K.layers.Dense(
        units = 512,
        activation=activation,
        kernel_initializer=init,
        )(flat)

    dense1_drop = regularizer(dense1)

    dense2 = K.layers.Dense(
        units = 128,
        activation=activation,
        kernel_initializer=init,
        )(dense1_drop)

    dense2_drop = regularizer(dense2)

    # output
    output = K.layers.Dense(
        units=10,
        activation='softmax',
        kernel_initializer=init
        )(dense2_drop)
    
    # build model
    model = K.models.Model(input, output)

    # compile model
    model.compile(optimizer=K.optimizers.Adam(),
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])

    return model

if __name__ == "__main__":
    # download and preprocess data
    (x_train, y_train), (x_valid, y_valid) = K.datasets.cifar10.load_data()
    x_train, y_train = preprocess_data(x_train, y_train)
    x_valid, y_valid = preprocess_data(x_valid, y_valid)

    # define hyperparameters
    init = K.initializers.he_normal()
    activation = 'relu'
    patience = 2
    batch_size = 32
    epochs = 100
    alpha = 0.01
    decay_rate = 0.2
    keep_prob = 0.5

    # create model
    model = build_model(init, activation, keep_prob)

    model.summary()

    # callbacks
    callbacks = [K.callbacks.EarlyStopping(patience=patience),
                #  K.callbacks.LearningRateScheduler(
                #     schedule=lambda epoch: alpha / (1 + epoch * decay_rate),
                #     verbose=True
                #     ),
                 K.callbacks.ModelCheckpoint(
                    filepath='cifar10.h5',
                    monitor='val_loss',
                    save_best_only=True
                    ),
                 ]

    model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs,
                    validation_data=(x_valid, y_valid), callbacks=callbacks)    


Model: "model_13"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_29 (InputLayer)          [(None, 32, 32, 3)]  0           []                               
                                                                                                  
 resizing_14 (Resizing)         (None, 150, 150, 3)  0           ['input_29[0][0]']               
                                                                                                  
 efficientnetv2-b2 (Functional)  (None, 5, 5, 1408)  8769374     ['resizing_14[0][0]']            
                                                                                                  
 average_pooling2d_13 (AverageP  (None, 1, 1, 1408)  0           ['efficientnetv2-b2[0][0]']      
 ooling2D)                                                                                 

# Test 5
- Reduced depth and width of unfrozen layers

In [None]:
def preprocess_data(X, Y):
    """
    Returns unpacked and preprocessed data
    """
    X_p, Y_p = X, K.utils.to_categorical(Y, num_classes=10)
    return X_p, Y_p


def build_model(init=K.initializers.he_normal(), activation='relu',
                keep_prob=0.5):
    """
    Returns compiled model
    """
    # set regularizer
    regularizer = K.layers.Dropout(1 - keep_prob)

    # create input layer
    input = K.layers.Input(shape=(32, 32, 3))

    # resizing layer
    resize = K.layers.Resizing(
        height=150,
        width=150,
        interpolation="bilinear",
        crop_to_aspect_ratio=False)(input)

    # EfficientNet
    eNet = K.applications.EfficientNetV2B2(
        weights='imagenet',
        input_shape=(150, 150, 3),
        include_top=False,
        )
    eNet.trainable = False

    # Freeze previous layers
    frozen_layers = eNet(resize)

    # pooling
    pool = K.layers.AvgPool2D(
        pool_size=5,
        strides=1,
        padding='valid',
        )(frozen_layers)

    # flatten layer
    flat = K.layers.Flatten()(pool)

    # dense layers
    dense1 = K.layers.Dense(
        units = 100,
        activation=activation,
        kernel_initializer=init,
        )(flat)

    dense1_drop = regularizer(dense1)

    # output
    output = K.layers.Dense(
        units=10,
        activation='softmax',
        kernel_initializer=init
        )(dense1_drop)
    
    # build model
    model = K.models.Model(input, output)

    # compile model
    model.compile(optimizer=K.optimizers.Adam(),
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])

    return model

if __name__ == "__main__":
    # download and preprocess data
    (x_train, y_train), (x_valid, y_valid) = K.datasets.cifar10.load_data()
    x_train, y_train = preprocess_data(x_train, y_train)
    x_valid, y_valid = preprocess_data(x_valid, y_valid)

    # define hyperparameters
    init = K.initializers.he_normal()
    activation = 'relu'
    patience = 2
    batch_size = 32
    epochs = 100
    alpha = 0.01
    decay_rate = 0.2
    keep_prob = 0.5

    # create model
    model = build_model(init, activation, keep_prob)

    model.summary()

    # callbacks
    callbacks = [K.callbacks.EarlyStopping(patience=patience),
                #  K.callbacks.LearningRateScheduler(
                #     schedule=lambda epoch: alpha / (1 + epoch * decay_rate),
                #     verbose=True
                #     ),
                 K.callbacks.ModelCheckpoint(
                    filepath='cifar10.h5',
                    monitor='val_loss',
                    save_best_only=True
                    ),
                 ]

    model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs,
                    validation_data=(x_valid, y_valid), callbacks=callbacks)    


Model: "model_15"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_33 (InputLayer)       [(None, 32, 32, 3)]       0         
                                                                 
 resizing_16 (Resizing)      (None, 150, 150, 3)       0         
                                                                 
 efficientnetv2-b2 (Function  (None, 5, 5, 1408)       8769374   
 al)                                                             
                                                                 
 average_pooling2d_15 (Avera  (None, 1, 1, 1408)       0         
 gePooling2D)                                                    
                                                                 
 flatten_15 (Flatten)        (None, 1408)              0         
                                                                 
 dense_41 (Dense)            (None, 100)               140

# Test 6
- removed pooling layer and added 1x1 convolutional layers to increase data received from frozen layers
- increased depth of unfrozen layers
- reduced keep_prob to prevent overfitting

In [None]:
def preprocess_data(X, Y):
    """
    Returns unpacked and preprocessed data
    """
    X_p, Y_p = X, K.utils.to_categorical(Y, num_classes=10)
    return X_p, Y_p


def build_model(init=K.initializers.he_normal(), activation='relu',
                keep_prob=0.5):
    """
    Returns compiled model
    """

    # create input layer
    input = K.layers.Input(shape=(32, 32, 3))

    # resizing layer
    resize = K.layers.Resizing(
        height=150,
        width=150,
        interpolation="bilinear",
        crop_to_aspect_ratio=False)(input)

    # EfficientNet
    eNet = K.applications.EfficientNetV2B2(
        weights='imagenet',
        input_shape=(150, 150, 3),
        include_top=False,
        )
    eNet.trainable = False

    # Freeze previous layers
    frozen_layers = eNet(resize)

    # 1x1 convolution
    conv = K.layers.Conv2D(
        filters=250,
        kernel_size=1,
        strides=1,
        padding='valid',
        activation=activation,
        kernel_initializer=init
        )(frozen_layers)

    # flatten layer
    flat = K.layers.Flatten()(conv)

    # dense layers
    dense1 = K.layers.Dense(
        units = 100,
        activation=activation,
        kernel_initializer=init,
        )(flat)

    dense1_drop = K.layers.Dropout(1 - keep_prob)(dense1)

    dense2 = K.layers.Dense(
        units = 100,
        activation=activation,
        kernel_initializer=init,
        )(dense1_drop)

    dense2_drop = K.layers.Dropout(1 - keep_prob)(dense2)

    dense3 = K.layers.Dense(
        units = 100,
        activation=activation,
        kernel_initializer=init,
        )(dense2_drop)

    dense3_drop = K.layers.Dropout(1 - keep_prob)(dense3)

    # output
    output = K.layers.Dense(
        units=10,
        activation='softmax',
        kernel_initializer=init
        )(dense3_drop)
    
    # build model
    model = K.models.Model(input, output)

    # compile model
    model.compile(optimizer=K.optimizers.Adam(),
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])

    return model

if __name__ == "__main__":
    # download and preprocess data
    (x_train, y_train), (x_valid, y_valid) = K.datasets.cifar10.load_data()
    x_train, y_train = preprocess_data(x_train, y_train)
    x_valid, y_valid = preprocess_data(x_valid, y_valid)

    # define hyperparameters
    init = K.initializers.he_normal()
    activation = 'relu'
    patience = 2
    batch_size = 32
    epochs = 100
    alpha = 0.01
    decay_rate = 0.2
    keep_prob = 0.7

    # create model
    model = build_model(init, activation, keep_prob)

    model.summary()

    # callbacks
    callbacks = [K.callbacks.EarlyStopping(patience=patience),
                #  K.callbacks.LearningRateScheduler(
                #     schedule=lambda epoch: alpha / (1 + epoch * decay_rate),
                #     verbose=True
                #     ),
                 K.callbacks.ModelCheckpoint(
                    filepath='cifar10.h5',
                    monitor='val_loss',
                    save_best_only=True
                    ),
                 ]

    model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs,
                    validation_data=(x_valid, y_valid), callbacks=callbacks)    


Model: "model_10"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_21 (InputLayer)       [(None, 32, 32, 3)]       0         
                                                                 
 resizing_10 (Resizing)      (None, 150, 150, 3)       0         
                                                                 
 efficientnetv2-b2 (Function  (None, 5, 5, 1408)       8769374   
 al)                                                             
                                                                 
 conv2d_8 (Conv2D)           (None, 5, 5, 250)         352250    
                                                                 
 flatten_10 (Flatten)        (None, 6250)              0         
                                                                 
 dense_37 (Dense)            (None, 100)               625100    
                                                          

# Test 7
- Removed 1x1 convolutional layer

In [None]:
def preprocess_data(X, Y):
    """
    Returns unpacked and preprocessed data
    """
    X_p, Y_p = X, K.utils.to_categorical(Y, num_classes=10)
    return X_p, Y_p


def build_model(init=K.initializers.he_normal(), activation='relu',
                keep_prob=0.5):
    """
    Returns compiled model
    """

    # create input layer
    input = K.layers.Input(shape=(32, 32, 3))

    # resizing layer
    resize = K.layers.Resizing(
        height=150,
        width=150,
        interpolation="bilinear",
        crop_to_aspect_ratio=False)(input)

    # EfficientNet
    eNet = K.applications.EfficientNetV2B2(
        weights='imagenet',
        input_shape=(150, 150, 3),
        include_top=False,
        )
    eNet.trainable = False

    # Freeze previous layers
    frozen_layers = eNet(resize)

    # 1x1 convolution
    # conv = K.layers.Conv2D(
    #     filters=250,
    #     kernel_size=1,
    #     strides=1,
    #     padding='valid',
    #     activation=activation,
    #     kernel_initializer=init
    #     )(frozen_layers)

    # flatten layer
    flat = K.layers.Flatten()(frozen_layers)

    # dense layers
    dense1 = K.layers.Dense(
        units = 100,
        activation=activation,
        kernel_initializer=init,
        )(flat)

    dense1_drop = K.layers.Dropout(1 - keep_prob)(dense1)

    dense2 = K.layers.Dense(
        units = 100,
        activation=activation,
        kernel_initializer=init,
        )(dense1_drop)

    dense2_drop = K.layers.Dropout(1 - keep_prob)(dense2)

    dense3 = K.layers.Dense(
        units = 100,
        activation=activation,
        kernel_initializer=init,
        )(dense2_drop)

    dense3_drop = K.layers.Dropout(1 - keep_prob)(dense3)

    # output
    output = K.layers.Dense(
        units=10,
        activation='softmax',
        kernel_initializer=init
        )(dense3_drop)
    
    # build model
    model = K.models.Model(input, output)

    # compile model
    model.compile(optimizer=K.optimizers.Adam(),
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])

    return model

if __name__ == "__main__":
    # download and preprocess data
    (x_train, y_train), (x_valid, y_valid) = K.datasets.cifar10.load_data()
    x_train, y_train = preprocess_data(x_train, y_train)
    x_valid, y_valid = preprocess_data(x_valid, y_valid)

    # define hyperparameters
    init = K.initializers.he_normal()
    activation = 'relu'
    patience = 2
    batch_size = 32
    epochs = 100
    alpha = 0.01
    decay_rate = 0.2
    keep_prob = 0.7

    # create model
    model = build_model(init, activation, keep_prob)

    model.summary()

    # callbacks
    callbacks = [K.callbacks.EarlyStopping(patience=patience),
                #  K.callbacks.LearningRateScheduler(
                #     schedule=lambda epoch: alpha / (1 + epoch * decay_rate),
                #     verbose=True
                #     ),
                 K.callbacks.ModelCheckpoint(
                    filepath='cifar10.h5',
                    monitor='val_loss',
                    save_best_only=True
                    ),
                 ]

    model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs,
                    validation_data=(x_valid, y_valid), callbacks=callbacks)    


Model: "model_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_9 (InputLayer)        [(None, 32, 32, 3)]       0         
                                                                 
 resizing_4 (Resizing)       (None, 150, 150, 3)       0         
                                                                 
 efficientnetv2-b2 (Function  (None, 5, 5, 1408)       8769374   
 al)                                                             
                                                                 
 flatten_4 (Flatten)         (None, 35200)             0         
                                                                 
 dense_16 (Dense)            (None, 100)               3520100   
                                                                 
 dropout_8 (Dropout)         (None, 100)               0         
                                                           

# Test 8
- Increased image resizing to 224x224
- increased patience

In [None]:
def preprocess_data(X, Y):
    """
    Returns unpacked and preprocessed data
    """
    X_p, Y_p = X, K.utils.to_categorical(Y, num_classes=10)
    return X_p, Y_p


def build_model(init=K.initializers.he_normal(), activation='relu',
                keep_prob=0.5):
    """
    Returns compiled model
    """

    # create input layer
    input = K.layers.Input(shape=(32, 32, 3))

    # resizing layer
    resize = K.layers.Resizing(
        height=224,
        width=224,
        interpolation="bilinear",
        crop_to_aspect_ratio=False)(input)

    # EfficientNet
    eNet = K.applications.EfficientNetV2B2(
        weights='imagenet',
        input_shape=(224, 224, 3),
        include_top=False,
        )
    eNet.trainable = False

    # Freeze previous layers
    frozen_layers = eNet(resize)

    # 1x1 convolution
    conv = K.layers.Conv2D(
        filters=250,
        kernel_size=1,
        strides=1,
        padding='valid',
        activation=activation,
        kernel_initializer=init
        )(frozen_layers)

    # flatten layer
    flat = K.layers.Flatten()(conv)

    # dense layers
    dense1 = K.layers.Dense(
        units = 100,
        activation=activation,
        kernel_initializer=init,
        )(flat)

    dense1_drop = K.layers.Dropout(1 - keep_prob)(dense1)

    dense2 = K.layers.Dense(
        units = 100,
        activation=activation,
        kernel_initializer=init,
        )(dense1_drop)

    dense2_drop = K.layers.Dropout(1 - keep_prob)(dense2)

    dense3 = K.layers.Dense(
        units = 100,
        activation=activation,
        kernel_initializer=init,
        )(dense2_drop)

    dense3_drop = K.layers.Dropout(1 - keep_prob)(dense3)

    # output
    output = K.layers.Dense(
        units=10,
        activation='softmax',
        kernel_initializer=init
        )(dense3_drop)
    
    # build model
    model = K.models.Model(input, output)

    # compile model
    model.compile(optimizer=K.optimizers.Adam(),
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])

    return model

if __name__ == "__main__":
    # download and preprocess data
    (x_train, y_train), (x_valid, y_valid) = K.datasets.cifar10.load_data()
    x_train, y_train = preprocess_data(x_train, y_train)
    x_valid, y_valid = preprocess_data(x_valid, y_valid)

    # define hyperparameters
    init = K.initializers.he_normal()
    activation = 'relu'
    patience = 3
    batch_size = 32
    epochs = 100
    alpha = 0.01
    decay_rate = 0.2
    keep_prob = 0.7

    # create model
    model = build_model(init, activation, keep_prob)

    model.summary()

    # callbacks
    callbacks = [K.callbacks.EarlyStopping(patience=patience),
                #  K.callbacks.LearningRateScheduler(
                #     schedule=lambda epoch: alpha / (1 + epoch * decay_rate),
                #     verbose=True
                #     ),
                 K.callbacks.ModelCheckpoint(
                    filepath='cifar10.h5',
                    monitor='val_loss',
                    save_best_only=True
                    ),
                 ]

    model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs,
                    validation_data=(x_valid, y_valid), callbacks=callbacks)    


Model: "model_13"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_27 (InputLayer)       [(None, 32, 32, 3)]       0         
                                                                 
 resizing_13 (Resizing)      (None, 224, 224, 3)       0         
                                                                 
 efficientnetv2-b2 (Function  (None, 7, 7, 1408)       8769374   
 al)                                                             
                                                                 
 conv2d_10 (Conv2D)          (None, 7, 7, 250)         352250    
                                                                 
 flatten_13 (Flatten)        (None, 12250)             0         
                                                                 
 dense_46 (Dense)            (None, 100)               1225100   
                                                          

# Test 9
- Increased features before fully connected layers

In [None]:
def preprocess_data(X, Y):
    """
    Returns unpacked and preprocessed data
    """
    X_p, Y_p = X, K.utils.to_categorical(Y, num_classes=10)
    return X_p, Y_p


def build_model(init=K.initializers.he_normal(), activation='relu',
                keep_prob=0.5):
    """
    Returns compiled model
    """

    # create input layer
    input = K.layers.Input(shape=(32, 32, 3))

    # resizing layer
    resize = K.layers.Resizing(
        height=224,
        width=224,
        interpolation="bilinear",
        crop_to_aspect_ratio=False)(input)

    # EfficientNet
    eNet = K.applications.EfficientNetV2B2(
        weights='imagenet',
        input_shape=(224, 224, 3),
        include_top=False,
        )
    eNet.trainable = False

    # Freeze previous layers
    frozen_layers = eNet(resize)

    # 1x1 convolution
    conv = K.layers.Conv2D(
        filters=704,
        kernel_size=1,
        strides=1,
        padding='valid',
        activation=activation,
        kernel_initializer=init
        )(frozen_layers)

    # flatten layer
    flat = K.layers.Flatten()(conv)

    # dense layers
    dense1 = K.layers.Dense(
        units = 100,
        activation=activation,
        kernel_initializer=init,
        )(flat)

    dense1_drop = K.layers.Dropout(1 - keep_prob)(dense1)

    dense2 = K.layers.Dense(
        units = 100,
        activation=activation,
        kernel_initializer=init,
        )(dense1_drop)

    dense2_drop = K.layers.Dropout(1 - keep_prob)(dense2)

    dense3 = K.layers.Dense(
        units = 100,
        activation=activation,
        kernel_initializer=init,
        )(dense2_drop)

    dense3_drop = K.layers.Dropout(1 - keep_prob)(dense3)

    # output
    output = K.layers.Dense(
        units=10,
        activation='softmax',
        kernel_initializer=init
        )(dense3_drop)
    
    # build model
    model = K.models.Model(input, output)

    # compile model
    model.compile(optimizer=K.optimizers.Adam(),
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])

    return model

if __name__ == "__main__":
    # download and preprocess data
    (x_train, y_train), (x_valid, y_valid) = K.datasets.cifar10.load_data()
    x_train, y_train = preprocess_data(x_train, y_train)
    x_valid, y_valid = preprocess_data(x_valid, y_valid)

    # define hyperparameters
    init = K.initializers.he_normal()
    activation = 'relu'
    patience = 3
    batch_size = 32
    epochs = 100
    alpha = 0.01
    decay_rate = 0.2
    keep_prob = 0.7

    # create model
    model = build_model(init, activation, keep_prob)

    model.summary()

    # callbacks
    callbacks = [K.callbacks.EarlyStopping(patience=patience),
                #  K.callbacks.LearningRateScheduler(
                #     schedule=lambda epoch: alpha / (1 + epoch * decay_rate),
                #     verbose=True
                #     ),
                 K.callbacks.ModelCheckpoint(
                    filepath='cifar10.h5',
                    monitor='val_loss',
                    save_best_only=True
                    ),
                 ]

    model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs,
                    validation_data=(x_valid, y_valid), callbacks=callbacks)    


Model: "model_14"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_29 (InputLayer)       [(None, 32, 32, 3)]       0         
                                                                 
 resizing_14 (Resizing)      (None, 224, 224, 3)       0         
                                                                 
 efficientnetv2-b2 (Function  (None, 7, 7, 1408)       8769374   
 al)                                                             
                                                                 
 conv2d_11 (Conv2D)          (None, 7, 7, 704)         991936    
                                                                 
 flatten_14 (Flatten)        (None, 34496)             0         
                                                                 
 dense_50 (Dense)            (None, 100)               3449700   
                                                          

# Test 10
- Decreased features before fully connected layers

In [None]:
def preprocess_data(X, Y):
    """
    Returns unpacked and preprocessed data
    """
    X_p, Y_p = X, K.utils.to_categorical(Y, num_classes=10)
    return X_p, Y_p


def build_model(init=K.initializers.he_normal(), activation='relu',
                keep_prob=0.5):
    """
    Returns compiled model
    """

    # create input layer
    input = K.layers.Input(shape=(32, 32, 3))

    # resizing layer
    resize = K.layers.Resizing(
        height=224,
        width=224,
        interpolation="bilinear",
        crop_to_aspect_ratio=False)(input)

    # EfficientNet
    eNet = K.applications.EfficientNetV2B2(
        weights='imagenet',
        input_shape=(224, 224, 3),
        include_top=False,
        )
    eNet.trainable = False

    # Freeze previous layers
    frozen_layers = eNet(resize)

    # 1x1 convolution
    conv = K.layers.Conv2D(
        filters=176,
        kernel_size=1,
        strides=1,
        padding='valid',
        activation=activation,
        kernel_initializer=init
        )(frozen_layers)

    # flatten layer
    flat = K.layers.Flatten()(conv)

    # dense layers
    dense1 = K.layers.Dense(
        units = 100,
        activation=activation,
        kernel_initializer=init,
        )(flat)

    dense1_drop = K.layers.Dropout(1 - keep_prob)(dense1)

    dense2 = K.layers.Dense(
        units = 100,
        activation=activation,
        kernel_initializer=init,
        )(dense1_drop)

    dense2_drop = K.layers.Dropout(1 - keep_prob)(dense2)

    dense3 = K.layers.Dense(
        units = 100,
        activation=activation,
        kernel_initializer=init,
        )(dense2_drop)

    dense3_drop = K.layers.Dropout(1 - keep_prob)(dense3)

    # output
    output = K.layers.Dense(
        units=10,
        activation='softmax',
        kernel_initializer=init
        )(dense3_drop)
    
    # build model
    model = K.models.Model(input, output)

    # compile model
    model.compile(optimizer=K.optimizers.Adam(),
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])

    return model

if __name__ == "__main__":
    # download and preprocess data
    (x_train, y_train), (x_valid, y_valid) = K.datasets.cifar10.load_data()
    x_train, y_train = preprocess_data(x_train, y_train)
    x_valid, y_valid = preprocess_data(x_valid, y_valid)

    # define hyperparameters
    init = K.initializers.he_normal()
    activation = 'relu'
    patience = 3
    batch_size = 32
    epochs = 100
    alpha = 0.01
    decay_rate = 0.2
    keep_prob = 0.7

    # create model
    model = build_model(init, activation, keep_prob)

    model.summary()

    # callbacks
    callbacks = [K.callbacks.EarlyStopping(patience=patience),
                #  K.callbacks.LearningRateScheduler(
                #     schedule=lambda epoch: alpha / (1 + epoch * decay_rate),
                #     verbose=True
                #     ),
                 K.callbacks.ModelCheckpoint(
                    filepath='cifar10.h5',
                    monitor='val_loss',
                    save_best_only=True
                    ),
                 ]

    model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs,
                    validation_data=(x_valid, y_valid), callbacks=callbacks)    


Model: "model_17"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_35 (InputLayer)       [(None, 32, 32, 3)]       0         
                                                                 
 resizing_17 (Resizing)      (None, 224, 224, 3)       0         
                                                                 
 efficientnetv2-b2 (Function  (None, 7, 7, 1408)       8769374   
 al)                                                             
                                                                 
 conv2d_14 (Conv2D)          (None, 7, 7, 176)         247984    
                                                                 
 flatten_17 (Flatten)        (None, 8624)              0         
                                                                 
 dense_62 (Dense)            (None, 100)               862500    
                                                          

# Test 11
- Same as test 0, with pooling layer removed

In [None]:
def preprocess_data(X, Y):
    """
    Returns unpacked and preprocessed data
    """
    X_p, Y_p = X, K.utils.to_categorical(Y, num_classes=10)
    return X_p, Y_p


def build_model(init=K.initializers.he_normal(), activation='relu'):
    """
    Returns compiled model
    """
    # create input layer
    input = K.layers.Input(shape=(32, 32, 3))

    # resizing layer
    resize = K.layers.Resizing(
        height=224,
        width=224,
        interpolation="bilinear",
        crop_to_aspect_ratio=False)(input)

    # EfficientNet
    eNet = K.applications.EfficientNetV2B2(
        weights='imagenet',
        input_shape=(224, 224, 3),
        include_top=False,
        )
    eNet.trainable = False

    # Freeze previous layers
    frozen_layers = eNet(resize)

    # pooling
    # pool = K.layers.AvgPool2D(
    #     pool_size=7,
    #     strides=1,
    #     padding='valid',
    #     )(frozen_layers)

    # flatten layer
    flat = K.layers.Flatten()(frozen_layers)

    # output
    output = K.layers.Dense(
        units=10,
        activation='softmax',
        kernel_initializer=init
        )(flat)
    
    # build model
    model = K.models.Model(input, output)

    # compile model
    model.compile(optimizer=K.optimizers.Adam(),
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])

    return model

if __name__ == "__main__":
    # download and preprocess data
    (x_train, y_train), (x_valid, y_valid) = K.datasets.cifar10.load_data()
    x_train, y_train = preprocess_data(x_train, y_train)
    x_valid, y_valid = preprocess_data(x_valid, y_valid)

    # define hyperparameters
    init = K.initializers.he_normal()
    activation = 'relu'
    patience = 2
    batch_size = 32
    epochs = 100
    alpha = 0.1
    decay_rate = 1

    # create model

    model = build_model(init, activation)

    model.summary()

    # callbacks
    callbacks = [K.callbacks.EarlyStopping(patience=patience),
                #  K.callbacks.LearningRateScheduler(
                #     schedule=lambda epoch: alpha / (1 + epoch * decay_rate),
                #     verbose=True),
                 K.callbacks.ModelCheckpoint(
                    filepath='cifar10.h5',
                    monitor='val_loss',
                    save_best_only=True)
                 ]

    model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs,
                    validation_data=(x_valid, y_valid), callbacks=callbacks)    


Model: "model_18"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_37 (InputLayer)       [(None, 32, 32, 3)]       0         
                                                                 
 resizing_18 (Resizing)      (None, 224, 224, 3)       0         
                                                                 
 efficientnetv2-b2 (Function  (None, 7, 7, 1408)       8769374   
 al)                                                             
                                                                 
 flatten_18 (Flatten)        (None, 68992)             0         
                                                                 
 dense_66 (Dense)            (None, 10)                689930    
                                                                 
Total params: 9,459,304
Trainable params: 689,930
Non-trainable params: 8,769,374
__________________________________________

# Test 12
- Similar to initial test, but added 1x1 convolution to remove features

In [3]:
def preprocess_data(X, Y):
    """
    Returns unpacked and preprocessed data
    """
    X_p, Y_p = X, K.utils.to_categorical(Y, num_classes=10)
    return X_p, Y_p


def build_model(init=K.initializers.he_normal(), activation='relu'):
    """
    Returns compiled model
    """
    # create input layer
    input = K.layers.Input(shape=(32, 32, 3))

    # resizing layer
    resize = K.layers.Resizing(
        height=224,
        width=224,
        interpolation="bilinear",
        crop_to_aspect_ratio=False)(input)

    # EfficientNet
    eNet = K.applications.EfficientNetV2B2(
        weights='imagenet',
        input_shape=(224, 224, 3),
        include_top=False,
        )
    eNet.trainable = False

    # Freeze previous layers
    frozen_layers = eNet(resize)

    # Reduce features
    conv = K.layers.Conv2D(
        filters=250,
        kernel_size=1,
        strides=1,
        padding='valid',
        activation=activation,
        kernel_initializer=init
        )(frozen_layers)

    # pooling
    pool = K.layers.AvgPool2D(
        pool_size=7,
        strides=1,
        padding='valid',
        )(conv)

    # flatten layer
    flat = K.layers.Flatten()(pool)

    # output
    output = K.layers.Dense(
        units=10,
        activation='softmax',
        kernel_initializer=init
        )(flat)
    
    # build model
    model = K.models.Model(input, output)

    # compile model
    model.compile(optimizer=K.optimizers.Adam(),
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])

    return model

if __name__ == "__main__":
    # download and preprocess data
    (x_train, y_train), (x_valid, y_valid) = K.datasets.cifar10.load_data()
    x_train, y_train = preprocess_data(x_train, y_train)
    x_valid, y_valid = preprocess_data(x_valid, y_valid)

    # define hyperparameters
    init = K.initializers.he_normal()
    activation = 'relu'
    patience = 2
    batch_size = 32
    epochs = 100
    alpha = 0.1
    decay_rate = 1

    # create model

    model = build_model(init, activation)

    model.summary()

    # callbacks
    callbacks = [K.callbacks.EarlyStopping(patience=patience),
                #  K.callbacks.LearningRateScheduler(
                #     schedule=lambda epoch: alpha / (1 + epoch * decay_rate),
                #     verbose=True),
                 K.callbacks.ModelCheckpoint(
                    filepath='cifar10.h5',
                    monitor='val_loss',
                    save_best_only=True)
                 ]

    model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs,
                    validation_data=(x_valid, y_valid), callbacks=callbacks)    


Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/efficientnet_v2/efficientnetv2-b2_notop.h5
Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 32, 32, 3)]       0         
                                                                 
 resizing (Resizing)         (None, 224, 224, 3)       0         
                                                                 
 efficientnetv2-b2 (Function  (None, 7, 7, 1408)       8769374   
 al)                                                             
                                                                 
 conv2d (Conv2D)             (None, 7, 7, 250)         352250    
                                                                 
 average_pooling2d (AverageP  (None, 1, 1, 250)        



Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100


# Test 13
- added dropout

In [4]:
def preprocess_data(X, Y):
    """
    Returns unpacked and preprocessed data
    """
    X_p, Y_p = X, K.utils.to_categorical(Y, num_classes=10)
    return X_p, Y_p


def build_model(init=K.initializers.he_normal(),
                activation='relu',
                keep_rate=.5):
    """
    Returns compiled model
    """
    # create input layer
    input = K.layers.Input(shape=(32, 32, 3))

    # resizing layer
    resize = K.layers.Resizing(
        height=224,
        width=224,
        interpolation="bilinear",
        crop_to_aspect_ratio=False)(input)

    # EfficientNet
    eNet = K.applications.EfficientNetV2B2(
        weights='imagenet',
        input_shape=(224, 224, 3),
        include_top=False,
        )
    eNet.trainable = False

    # Freeze previous layers
    frozen_layers = eNet(resize)

    # Reduce features
    conv = K.layers.Conv2D(
        filters=250,
        kernel_size=1,
        strides=1,
        padding='valid',
        activation=activation,
        kernel_initializer=init
        )(frozen_layers)

    # pooling
    pool = K.layers.AvgPool2D(
        pool_size=7,
        strides=1,
        padding='valid',
        )(conv)

    # flatten layer
    flat = K.layers.Flatten()(pool)

    # dropout
    drop = K.layers.Dropout(1 - keep_rate)(flat)

    # output
    output = K.layers.Dense(
        units=10,
        activation='softmax',
        kernel_initializer=init
        )(drop)
    
    # build model
    model = K.models.Model(input, output)

    # compile model
    model.compile(optimizer=K.optimizers.Adam(),
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])

    return model

if __name__ == "__main__":
    # download and preprocess data
    (x_train, y_train), (x_valid, y_valid) = K.datasets.cifar10.load_data()
    x_train, y_train = preprocess_data(x_train, y_train)
    x_valid, y_valid = preprocess_data(x_valid, y_valid)

    # define hyperparameters
    init = K.initializers.he_normal()
    activation = 'relu'
    patience = 3
    batch_size = 32
    epochs = 100
    alpha = 0.1
    decay_rate = 1
    keep_rate = .7

    # create model

    model = build_model(init, activation, keep_rate)

    model.summary()

    # callbacks
    callbacks = [K.callbacks.EarlyStopping(patience=patience),
                #  K.callbacks.LearningRateScheduler(
                #     schedule=lambda epoch: alpha / (1 + epoch * decay_rate),
                #     verbose=True),
                 K.callbacks.ModelCheckpoint(
                    filepath='cifar10.h5',
                    monitor='val_loss',
                    save_best_only=True)
                 ]

    model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs,
                    validation_data=(x_valid, y_valid), callbacks=callbacks)    


Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_3 (InputLayer)        [(None, 32, 32, 3)]       0         
                                                                 
 resizing_1 (Resizing)       (None, 224, 224, 3)       0         
                                                                 
 efficientnetv2-b2 (Function  (None, 7, 7, 1408)       8769374   
 al)                                                             
                                                                 
 conv2d_1 (Conv2D)           (None, 7, 7, 250)         352250    
                                                                 
 average_pooling2d_1 (Averag  (None, 1, 1, 250)        0         
 ePooling2D)                                                     
                                                                 
 flatten_1 (Flatten)         (None, 250)               0   