In [1]:
import tensorflow as tf
from utils.custom_checkpoints import CustomModelCheckpoint



In [2]:
tf.__version__

'2.17.0'

In [3]:
mnist_fashion = tf.keras.datasets.fashion_mnist
(X_train_full, y_train_full), (X_test, y_test) = mnist_fashion.load_data()

In [4]:
X_train_full.shape, X_test.shape

((60000, 28, 28), (10000, 28, 28))

In [5]:
X_valid, X_train = X_train_full[:10000], X_train_full[10000:]
y_valid, y_train = y_train_full[:10000], y_train_full[10000:]

In [6]:
X_train = X_train / 255
X_valid = X_valid / 255
X_test = X_test / 255

### Keep previous checkpoints along with the latest one
- **Pros:** It preserves traning info such as epoch, val_acc etc in the file name.
- **Cons:** Multiple files are created taking up storage space.

In [27]:
model_checkpoint = tf.keras.callbacks.ModelCheckpoint(
    filepath='checkpoints/best_model_epoch_{epoch:03d}_val_acc_{val_accuracy:.4f}.keras',
    save_best_only=True,
    monitor="val_accuracy",
)

### Overwrite previous checkpoint with the latest one
- **Pros:** Only single file is created saving storage space.
- **Cons:** Doesn't preserve traning info such as epoch, val_acc etc in the file name.

In [None]:
model_checkpoint = tf.keras.callbacks.ModelCheckpoint(
    filepath='checkpoints/best_model.keras',
    save_best_only=True,
    monitor="val_accuracy",
)

### Delete previous checkpoint after saving the latest one
- **Pros:** It preserves traning info such as epoch, val_acc etc in the file name and saves storage space by deleting old checkpoints.

In [None]:
custom_checkpoint = CustomModelCheckpoint(
    filepath='checkpoints/best_model_epoch_{epoch:03d}_val_acc_{val_accuracy:.4f}.keras',
    monitor='val_accuracy',
    mode='max'
)

In [28]:
model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(28, 28)),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(30, activation="relu"),
    tf.keras.layers.Dense(30, activation="relu"),
    tf.keras.layers.Dense(10, activation="softmax"),
])

In [29]:
model.summary()

In [30]:
model.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"])

In [31]:
history = model.fit(X_train, y_train, epochs=10, validation_data=(X_valid, y_valid),
                    callbacks=[model_checkpoint])

Epoch 1/10


2024-12-12 21:55:01.485428: W external/local_tsl/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 156800000 exceeds 10% of free system memory.


[1m1563/1563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 0.7265 - loss: 0.7985 - val_accuracy: 0.8506 - val_loss: 0.4304
Epoch 2/10
[1m1563/1563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.8462 - loss: 0.4305 - val_accuracy: 0.8611 - val_loss: 0.3844
Epoch 3/10
[1m1563/1563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.8615 - loss: 0.3888 - val_accuracy: 0.8581 - val_loss: 0.3924
Epoch 4/10
[1m1563/1563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.8685 - loss: 0.3605 - val_accuracy: 0.8696 - val_loss: 0.3642
Epoch 5/10
[1m1563/1563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - accuracy: 0.8767 - loss: 0.3375 - val_accuracy: 0.8709 - val_loss: 0.3611
Epoch 6/10
[1m1563/1563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.8795 - loss: 0.3294 - val_accuracy: 0.8658 - val_loss: 0.3711
Epoch 7/10
[1m1563/1563[0