In [1]:
%load_ext watermark

In [2]:
%watermark

Last updated: 2022-08-29T12:57:19.789707-04:00

Python implementation: CPython
Python version       : 3.8.10
IPython version      : 7.28.0

Compiler    : GCC 9.3.0
OS          : Linux
Release     : 5.11.0-37-generic
Machine     : x86_64
Processor   : x86_64
CPU cores   : 32
Architecture: 64bit



In [23]:
import keras_tuner
from tensorflow import keras
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score
import keras_tuner as kt
from tensorflow.keras import layers

In [21]:
%watermark --iversions

numpy      : 1.22.4
keras_tuner: 1.1.3
pandas     : 1.4.3
tensorflow : 2.9.1



In [7]:
def load_data(path):
    data = pd.read_csv(path)
    y = data["label"]
    x = data.drop(labels=["label"], axis=1).values.reshape(-1, 28, 28, 1)
    return x, y

x_train, y_train = load_data("../input/train.csv.gz")
x_test, y_test = load_data("../input/test.csv.gz")

In [8]:
x_train.shape

(60000, 28, 28, 1)

In [9]:
x_test.shape

(10000, 28, 28, 1)

In [10]:
def augment_images(x, hp):
    use_rotation = hp.Boolean('use_rotation')
    if use_rotation:
        x = layers.experimental.preprocessing.RandomRotation(
            hp.Float('rotation_factor', min_value=0.05, max_value=0.2)
        )(x)
    use_zoom = hp.Boolean('use_zoom')
    if use_zoom:
        x = layers.experimental.preprocessing.RandomZoom(
            hp.Float('use_zoom', min_value=0.05, max_value=0.2)
        )(x)
    return x

def make_model(hp):
    inputs = keras.Input(shape=(28, 28, 1))
    x = layers.experimental.preprocessing.Rescaling(1. / 255)(inputs)
    x = layers.experimental.preprocessing.Resizing(64, 64)(x)
    x = augment_images(x, hp)
    
    num_block = hp.Int('num_block', min_value=2, max_value=5, step=1)
    num_filters = hp.Int('num_filters', min_value=32, max_value=128, step=32)
    for i in range(num_block):
        x = layers.Conv2D(
            num_filters,
            kernel_size=3,
            activation='relu',
            padding='same'
        )(x)
        x = layers.Conv2D(
            num_filters,
            kernel_size=3,
            activation='relu',
            padding='same'
        )(x)
        x = layers.MaxPooling2D(2)(x)
    
    reduction_type = hp.Choice('reduction_type', ['flatten', 'avg'])
    if reduction_type == 'flatten':
        x = layers.Flatten()(x)
    else:
        x = layers.GlobalAveragePooling2D()(x)

    x = layers.Dense(
        units=hp.Int('num_dense_units', min_value=32, max_value=512, step=32),
        activation='relu'
    )(x)
    x = layers.Dropout(
        hp.Float('dense_dropout', min_value=0., max_value=0.7)
    )(x)
    outputs = layers.Dense(10)(x)
    model = keras.Model(inputs, outputs)
    
    learning_rate = hp.Float('learning_rate', min_value=3e-4, max_value=3e-3)
    optimizer = keras.optimizers.Adam(learning_rate=1e-3)
    model.compile(loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
                  optimizer=optimizer,
                  metrics=[keras.metrics.SparseCategoricalAccuracy(name='acc')])
    model.summary()
    return model

In [14]:
%%time

tuner = kt.tuners.RandomSearch(
    make_model,
    objective='val_acc',
    max_trials=100,
    overwrite=True)

callbacks=[keras.callbacks.EarlyStopping(monitor='val_acc', mode='max', patience=3, baseline=0.9)]
tuner.search(x_train, y_train, validation_split=0.2, callbacks=callbacks, verbose=1, epochs=100)

Trial 100 Complete [00h 04m 24s]
val_acc: 0.9794999957084656

Best val_acc So Far: 0.9939166903495789
Total elapsed time: 04h 46m 55s
INFO:tensorflow:Oracle triggered exit
CPU times: user 5h 19min 40s, sys: 10min 17s, total: 5h 29min 57s
Wall time: 4h 46min 56s


In [15]:
best_hp = tuner.get_best_hyperparameters()[0]
model = make_model(best_hp)
history = model.fit(x_train, y_train, validation_split=0.2, epochs=50)

Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 28, 28, 1)]       0         
                                                                 
 rescaling_1 (Rescaling)     (None, 28, 28, 1)         0         
                                                                 
 resizing_1 (Resizing)       (None, 64, 64, 1)         0         
                                                                 
 conv2d_8 (Conv2D)           (None, 64, 64, 96)        960       
                                                                 
 conv2d_9 (Conv2D)           (None, 64, 64, 96)        83040     
                                                                 
 max_pooling2d_4 (MaxPooling  (None, 32, 32, 96)       0         
 2D)                                                             
                                                           

In [16]:
val_acc_per_epoch = history.history['val_acc']
best_epoch = val_acc_per_epoch.index(max(val_acc_per_epoch)) + 1
model = make_model(best_hp)
model.fit(x_train, y_train, epochs=best_epoch)

Model: "model_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_3 (InputLayer)        [(None, 28, 28, 1)]       0         
                                                                 
 rescaling_2 (Rescaling)     (None, 28, 28, 1)         0         
                                                                 
 resizing_2 (Resizing)       (None, 64, 64, 1)         0         
                                                                 
 conv2d_16 (Conv2D)          (None, 64, 64, 96)        960       
                                                                 
 conv2d_17 (Conv2D)          (None, 64, 64, 96)        83040     
                                                                 
 max_pooling2d_8 (MaxPooling  (None, 32, 32, 96)       0         
 2D)                                                             
                                                           

<keras.callbacks.History at 0x7fd540760850>

In [17]:
predictions = model.predict(x_test)



In [18]:
predictions

array([[-36.49712  , -13.070313 , -18.638115 , ...,  13.772348 ,
        -29.663088 , -16.352293 ],
       [-30.06915  , -15.743183 ,  14.344524 , ..., -15.8320265,
        -24.241371 , -32.863167 ],
       [-17.364965 ,   6.338301 , -16.232584 , ..., -12.676114 ,
        -10.868713 , -17.373186 ],
       ...,
       [-55.760258 , -31.629309 , -40.37693  , ..., -33.89235  ,
        -32.132187 , -20.16865  ],
       [-46.783005 , -48.48218  , -51.36597  , ..., -57.92683  ,
        -37.55024  , -38.759586 ],
       [-11.318561 , -22.974426 , -20.178417 , ..., -51.011818 ,
        -22.764912 , -29.583496 ]], dtype=float32)

In [22]:
preds = np.argmax(predictions, axis=1)
preds

array([7, 2, 1, ..., 4, 5, 6])

In [25]:
y_test.values

array([7, 2, 1, ..., 4, 5, 6])

In [26]:
accuracy_score(y_test, preds)

0.9908